diff options
Diffstat (limited to 'numpy/random/src')
100 files changed, 16834 insertions, 0 deletions
diff --git a/numpy/random/src/aligned_malloc/aligned_malloc.c b/numpy/random/src/aligned_malloc/aligned_malloc.c new file mode 100644 index 000000000..6e8192cfb --- /dev/null +++ b/numpy/random/src/aligned_malloc/aligned_malloc.c @@ -0,0 +1,9 @@ +#include "aligned_malloc.h" + +static NPY_INLINE void *PyArray_realloc_aligned(void *p, size_t n); + +static NPY_INLINE void *PyArray_malloc_aligned(size_t n); + +static NPY_INLINE void *PyArray_calloc_aligned(size_t n, size_t s); + +static NPY_INLINE void PyArray_free_aligned(void *p);
\ No newline at end of file diff --git a/numpy/random/src/aligned_malloc/aligned_malloc.h b/numpy/random/src/aligned_malloc/aligned_malloc.h new file mode 100644 index 000000000..ea24f6d23 --- /dev/null +++ b/numpy/random/src/aligned_malloc/aligned_malloc.h @@ -0,0 +1,54 @@ +#ifndef _RANDOMDGEN__ALIGNED_MALLOC_H_ +#define _RANDOMDGEN__ALIGNED_MALLOC_H_ + +#include "Python.h" +#include "numpy/npy_common.h" + +#define NPY_MEMALIGN 16 /* 16 for SSE2, 32 for AVX, 64 for Xeon Phi */ + +static NPY_INLINE void *PyArray_realloc_aligned(void *p, size_t n) +{ + void *p1, **p2, *base; + size_t old_offs, offs = NPY_MEMALIGN - 1 + sizeof(void *); + if (NPY_UNLIKELY(p != NULL)) + { + base = *(((void **)p) - 1); + if (NPY_UNLIKELY((p1 = PyMem_Realloc(base, n + offs)) == NULL)) + return NULL; + if (NPY_LIKELY(p1 == base)) + return p; + p2 = (void **)(((Py_uintptr_t)(p1) + offs) & ~(NPY_MEMALIGN - 1)); + old_offs = (size_t)((Py_uintptr_t)p - (Py_uintptr_t)base); + memmove((void *)p2, ((char *)p1) + old_offs, n); + } + else + { + if (NPY_UNLIKELY((p1 = PyMem_Malloc(n + offs)) == NULL)) + return NULL; + p2 = (void **)(((Py_uintptr_t)(p1) + offs) & ~(NPY_MEMALIGN - 1)); + } + *(p2 - 1) = p1; + return (void *)p2; +} + +static NPY_INLINE void *PyArray_malloc_aligned(size_t n) +{ + return PyArray_realloc_aligned(NULL, n); +} + +static NPY_INLINE void *PyArray_calloc_aligned(size_t n, size_t s) +{ + void *p; + if (NPY_UNLIKELY((p = PyArray_realloc_aligned(NULL, n * s)) == NULL)) + return NULL; + memset(p, 0, n * s); + return p; +} + +static NPY_INLINE void PyArray_free_aligned(void *p) +{ + void *base = *(((void **)p) - 1); + PyMem_Free(base); +} + +#endif diff --git a/numpy/random/src/common/LICENSE.md b/numpy/random/src/common/LICENSE.md new file mode 100644 index 000000000..71bf8cf46 --- /dev/null +++ b/numpy/random/src/common/LICENSE.md @@ -0,0 +1,29 @@ +ISO C9x compliant stdint.h for Microsoft Visual Studio +Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 + +Copyright (c) 2006-2013 Alexander Chemeris + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the name of the product nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file diff --git a/numpy/random/src/common/inttypes.h b/numpy/random/src/common/inttypes.h new file mode 100644 index 000000000..8f8b61108 --- /dev/null +++ b/numpy/random/src/common/inttypes.h @@ -0,0 +1,306 @@ +// ISO C9x compliant inttypes.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006-2013 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the product nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_INTTYPES_H_ // [ +#define _MSC_INTTYPES_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#include "stdint.h" + +// 7.8 Format conversion of integer types + +typedef struct { + intmax_t quot; + intmax_t rem; +} imaxdiv_t; + +// 7.8.1 Macros for format specifiers + +#if !defined(__cplusplus) || defined(__STDC_FORMAT_MACROS) // [ See footnote 185 at page 198 + +// The fprintf macros for signed integers are: +#define PRId8 "d" +#define PRIi8 "i" +#define PRIdLEAST8 "d" +#define PRIiLEAST8 "i" +#define PRIdFAST8 "d" +#define PRIiFAST8 "i" + +#define PRId16 "hd" +#define PRIi16 "hi" +#define PRIdLEAST16 "hd" +#define PRIiLEAST16 "hi" +#define PRIdFAST16 "hd" +#define PRIiFAST16 "hi" + +#define PRId32 "I32d" +#define PRIi32 "I32i" +#define PRIdLEAST32 "I32d" +#define PRIiLEAST32 "I32i" +#define PRIdFAST32 "I32d" +#define PRIiFAST32 "I32i" + +#define PRId64 "I64d" +#define PRIi64 "I64i" +#define PRIdLEAST64 "I64d" +#define PRIiLEAST64 "I64i" +#define PRIdFAST64 "I64d" +#define PRIiFAST64 "I64i" + +#define PRIdMAX "I64d" +#define PRIiMAX "I64i" + +#define PRIdPTR "Id" +#define PRIiPTR "Ii" + +// The fprintf macros for unsigned integers are: +#define PRIo8 "o" +#define PRIu8 "u" +#define PRIx8 "x" +#define PRIX8 "X" +#define PRIoLEAST8 "o" +#define PRIuLEAST8 "u" +#define PRIxLEAST8 "x" +#define PRIXLEAST8 "X" +#define PRIoFAST8 "o" +#define PRIuFAST8 "u" +#define PRIxFAST8 "x" +#define PRIXFAST8 "X" + +#define PRIo16 "ho" +#define PRIu16 "hu" +#define PRIx16 "hx" +#define PRIX16 "hX" +#define PRIoLEAST16 "ho" +#define PRIuLEAST16 "hu" +#define PRIxLEAST16 "hx" +#define PRIXLEAST16 "hX" +#define PRIoFAST16 "ho" +#define PRIuFAST16 "hu" +#define PRIxFAST16 "hx" +#define PRIXFAST16 "hX" + +#define PRIo32 "I32o" +#define PRIu32 "I32u" +#define PRIx32 "I32x" +#define PRIX32 "I32X" +#define PRIoLEAST32 "I32o" +#define PRIuLEAST32 "I32u" +#define PRIxLEAST32 "I32x" +#define PRIXLEAST32 "I32X" +#define PRIoFAST32 "I32o" +#define PRIuFAST32 "I32u" +#define PRIxFAST32 "I32x" +#define PRIXFAST32 "I32X" + +#define PRIo64 "I64o" +#define PRIu64 "I64u" +#define PRIx64 "I64x" +#define PRIX64 "I64X" +#define PRIoLEAST64 "I64o" +#define PRIuLEAST64 "I64u" +#define PRIxLEAST64 "I64x" +#define PRIXLEAST64 "I64X" +#define PRIoFAST64 "I64o" +#define PRIuFAST64 "I64u" +#define PRIxFAST64 "I64x" +#define PRIXFAST64 "I64X" + +#define PRIoMAX "I64o" +#define PRIuMAX "I64u" +#define PRIxMAX "I64x" +#define PRIXMAX "I64X" + +#define PRIoPTR "Io" +#define PRIuPTR "Iu" +#define PRIxPTR "Ix" +#define PRIXPTR "IX" + +// The fscanf macros for signed integers are: +#define SCNd8 "d" +#define SCNi8 "i" +#define SCNdLEAST8 "d" +#define SCNiLEAST8 "i" +#define SCNdFAST8 "d" +#define SCNiFAST8 "i" + +#define SCNd16 "hd" +#define SCNi16 "hi" +#define SCNdLEAST16 "hd" +#define SCNiLEAST16 "hi" +#define SCNdFAST16 "hd" +#define SCNiFAST16 "hi" + +#define SCNd32 "ld" +#define SCNi32 "li" +#define SCNdLEAST32 "ld" +#define SCNiLEAST32 "li" +#define SCNdFAST32 "ld" +#define SCNiFAST32 "li" + +#define SCNd64 "I64d" +#define SCNi64 "I64i" +#define SCNdLEAST64 "I64d" +#define SCNiLEAST64 "I64i" +#define SCNdFAST64 "I64d" +#define SCNiFAST64 "I64i" + +#define SCNdMAX "I64d" +#define SCNiMAX "I64i" + +#ifdef _WIN64 // [ +# define SCNdPTR "I64d" +# define SCNiPTR "I64i" +#else // _WIN64 ][ +# define SCNdPTR "ld" +# define SCNiPTR "li" +#endif // _WIN64 ] + +// The fscanf macros for unsigned integers are: +#define SCNo8 "o" +#define SCNu8 "u" +#define SCNx8 "x" +#define SCNX8 "X" +#define SCNoLEAST8 "o" +#define SCNuLEAST8 "u" +#define SCNxLEAST8 "x" +#define SCNXLEAST8 "X" +#define SCNoFAST8 "o" +#define SCNuFAST8 "u" +#define SCNxFAST8 "x" +#define SCNXFAST8 "X" + +#define SCNo16 "ho" +#define SCNu16 "hu" +#define SCNx16 "hx" +#define SCNX16 "hX" +#define SCNoLEAST16 "ho" +#define SCNuLEAST16 "hu" +#define SCNxLEAST16 "hx" +#define SCNXLEAST16 "hX" +#define SCNoFAST16 "ho" +#define SCNuFAST16 "hu" +#define SCNxFAST16 "hx" +#define SCNXFAST16 "hX" + +#define SCNo32 "lo" +#define SCNu32 "lu" +#define SCNx32 "lx" +#define SCNX32 "lX" +#define SCNoLEAST32 "lo" +#define SCNuLEAST32 "lu" +#define SCNxLEAST32 "lx" +#define SCNXLEAST32 "lX" +#define SCNoFAST32 "lo" +#define SCNuFAST32 "lu" +#define SCNxFAST32 "lx" +#define SCNXFAST32 "lX" + +#define SCNo64 "I64o" +#define SCNu64 "I64u" +#define SCNx64 "I64x" +#define SCNX64 "I64X" +#define SCNoLEAST64 "I64o" +#define SCNuLEAST64 "I64u" +#define SCNxLEAST64 "I64x" +#define SCNXLEAST64 "I64X" +#define SCNoFAST64 "I64o" +#define SCNuFAST64 "I64u" +#define SCNxFAST64 "I64x" +#define SCNXFAST64 "I64X" + +#define SCNoMAX "I64o" +#define SCNuMAX "I64u" +#define SCNxMAX "I64x" +#define SCNXMAX "I64X" + +#ifdef _WIN64 // [ +# define SCNoPTR "I64o" +# define SCNuPTR "I64u" +# define SCNxPTR "I64x" +# define SCNXPTR "I64X" +#else // _WIN64 ][ +# define SCNoPTR "lo" +# define SCNuPTR "lu" +# define SCNxPTR "lx" +# define SCNXPTR "lX" +#endif // _WIN64 ] + +#endif // __STDC_FORMAT_MACROS ] + +// 7.8.2 Functions for greatest-width integer types + +// 7.8.2.1 The imaxabs function +#define imaxabs _abs64 + +// 7.8.2.2 The imaxdiv function + +// This is modified version of div() function from Microsoft's div.c found +// in %MSVC.NET%\crt\src\div.c +#ifdef STATIC_IMAXDIV // [ +static +#else // STATIC_IMAXDIV ][ +_inline +#endif // STATIC_IMAXDIV ] +imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom) +{ + imaxdiv_t result; + + result.quot = numer / denom; + result.rem = numer % denom; + + if (numer < 0 && result.rem > 0) { + // did division wrong; must fix up + ++result.quot; + result.rem -= denom; + } + + return result; +} + +// 7.8.2.3 The strtoimax and strtoumax functions +#define strtoimax _strtoi64 +#define strtoumax _strtoui64 + +// 7.8.2.4 The wcstoimax and wcstoumax functions +#define wcstoimax _wcstoi64 +#define wcstoumax _wcstoui64 + + +#endif // _MSC_INTTYPES_H_ ]
\ No newline at end of file diff --git a/numpy/random/src/common/stdint.h b/numpy/random/src/common/stdint.h new file mode 100644 index 000000000..710de1570 --- /dev/null +++ b/numpy/random/src/common/stdint.h @@ -0,0 +1,258 @@ +// ISO C9x compliant stdint.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006-2013 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the product nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_STDINT_H_ // [ +#define _MSC_STDINT_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#if _MSC_VER >= 1600 // [ +#include <stdint.h> +#else // ] _MSC_VER >= 1600 [ + +#include <limits.h> + +// For Visual Studio 6 in C++ mode and for many Visual Studio versions when +// compiling for ARM we should wrap <wchar.h> include with 'extern "C++" {}' +// or compiler give many errors like this: +// error C2733: second C linkage of overloaded function 'wmemchr' not allowed +#ifdef __cplusplus +extern "C" { +#endif +#include <wchar.h> +#ifdef __cplusplus +} +#endif + +// Define _W64 macros to mark types changing their size, like intptr_t. +#ifndef _W64 +#if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300 +#define _W64 __w64 +#else +#define _W64 +#endif +#endif + +// 7.18.1 Integer types + +// 7.18.1.1 Exact-width integer types + +// Visual Studio 6 and Embedded Visual C++ 4 doesn't +// realize that, e.g. char has the same size as __int8 +// so we give up on __intX for them. +#if (_MSC_VER < 1300) +typedef signed char int8_t; +typedef signed short int16_t; +typedef signed int int32_t; +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; +#else +typedef signed __int8 int8_t; +typedef signed __int16 int16_t; +typedef signed __int32 int32_t; +typedef unsigned __int8 uint8_t; +typedef unsigned __int16 uint16_t; +typedef unsigned __int32 uint32_t; +#endif +typedef signed __int64 int64_t; +typedef unsigned __int64 uint64_t; + +// 7.18.1.2 Minimum-width integer types +typedef int8_t int_least8_t; +typedef int16_t int_least16_t; +typedef int32_t int_least32_t; +typedef int64_t int_least64_t; +typedef uint8_t uint_least8_t; +typedef uint16_t uint_least16_t; +typedef uint32_t uint_least32_t; +typedef uint64_t uint_least64_t; + +// 7.18.1.3 Fastest minimum-width integer types +typedef int8_t int_fast8_t; +typedef int16_t int_fast16_t; +typedef int32_t int_fast32_t; +typedef int64_t int_fast64_t; +typedef uint8_t uint_fast8_t; +typedef uint16_t uint_fast16_t; +typedef uint32_t uint_fast32_t; +typedef uint64_t uint_fast64_t; + +// 7.18.1.4 Integer types capable of holding object pointers +#ifdef _WIN64 // [ +typedef signed __int64 intptr_t; +typedef unsigned __int64 uintptr_t; +#else // _WIN64 ][ +typedef _W64 signed int intptr_t; +typedef _W64 unsigned int uintptr_t; +#endif // _WIN64 ] + +// 7.18.1.5 Greatest-width integer types +typedef int64_t intmax_t; +typedef uint64_t uintmax_t; + +// 7.18.2 Limits of specified-width integer types + +#if !defined(__cplusplus) || \ + defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and + // footnote 221 at page 259 + +// 7.18.2.1 Limits of exact-width integer types +#define INT8_MIN ((int8_t)_I8_MIN) +#define INT8_MAX _I8_MAX +#define INT16_MIN ((int16_t)_I16_MIN) +#define INT16_MAX _I16_MAX +#define INT32_MIN ((int32_t)_I32_MIN) +#define INT32_MAX _I32_MAX +#define INT64_MIN ((int64_t)_I64_MIN) +#define INT64_MAX _I64_MAX +#define UINT8_MAX _UI8_MAX +#define UINT16_MAX _UI16_MAX +#define UINT32_MAX _UI32_MAX +#define UINT64_MAX _UI64_MAX + +// 7.18.2.2 Limits of minimum-width integer types +#define INT_LEAST8_MIN INT8_MIN +#define INT_LEAST8_MAX INT8_MAX +#define INT_LEAST16_MIN INT16_MIN +#define INT_LEAST16_MAX INT16_MAX +#define INT_LEAST32_MIN INT32_MIN +#define INT_LEAST32_MAX INT32_MAX +#define INT_LEAST64_MIN INT64_MIN +#define INT_LEAST64_MAX INT64_MAX +#define UINT_LEAST8_MAX UINT8_MAX +#define UINT_LEAST16_MAX UINT16_MAX +#define UINT_LEAST32_MAX UINT32_MAX +#define UINT_LEAST64_MAX UINT64_MAX + +// 7.18.2.3 Limits of fastest minimum-width integer types +#define INT_FAST8_MIN INT8_MIN +#define INT_FAST8_MAX INT8_MAX +#define INT_FAST16_MIN INT16_MIN +#define INT_FAST16_MAX INT16_MAX +#define INT_FAST32_MIN INT32_MIN +#define INT_FAST32_MAX INT32_MAX +#define INT_FAST64_MIN INT64_MIN +#define INT_FAST64_MAX INT64_MAX +#define UINT_FAST8_MAX UINT8_MAX +#define UINT_FAST16_MAX UINT16_MAX +#define UINT_FAST32_MAX UINT32_MAX +#define UINT_FAST64_MAX UINT64_MAX + +// 7.18.2.4 Limits of integer types capable of holding object pointers +#ifdef _WIN64 // [ +#define INTPTR_MIN INT64_MIN +#define INTPTR_MAX INT64_MAX +#define UINTPTR_MAX UINT64_MAX +#else // _WIN64 ][ +#define INTPTR_MIN INT32_MIN +#define INTPTR_MAX INT32_MAX +#define UINTPTR_MAX UINT32_MAX +#endif // _WIN64 ] + +// 7.18.2.5 Limits of greatest-width integer types +#define INTMAX_MIN INT64_MIN +#define INTMAX_MAX INT64_MAX +#define UINTMAX_MAX UINT64_MAX + +// 7.18.3 Limits of other integer types + +#ifdef _WIN64 // [ +#define PTRDIFF_MIN _I64_MIN +#define PTRDIFF_MAX _I64_MAX +#else // _WIN64 ][ +#define PTRDIFF_MIN _I32_MIN +#define PTRDIFF_MAX _I32_MAX +#endif // _WIN64 ] + +#define SIG_ATOMIC_MIN INT_MIN +#define SIG_ATOMIC_MAX INT_MAX + +#ifndef SIZE_MAX // [ +#ifdef _WIN64 // [ +#define SIZE_MAX _UI64_MAX +#else // _WIN64 ][ +#define SIZE_MAX _UI32_MAX +#endif // _WIN64 ] +#endif // SIZE_MAX ] + +// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h> +#ifndef WCHAR_MIN // [ +#define WCHAR_MIN 0 +#endif // WCHAR_MIN ] +#ifndef WCHAR_MAX // [ +#define WCHAR_MAX _UI16_MAX +#endif // WCHAR_MAX ] + +#define WINT_MIN 0 +#define WINT_MAX _UI16_MAX + +#endif // __STDC_LIMIT_MACROS ] + +// 7.18.4 Limits of other integer types + +#if !defined(__cplusplus) || \ + defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260 + +// 7.18.4.1 Macros for minimum-width integer constants + +#define INT8_C(val) val##i8 +#define INT16_C(val) val##i16 +#define INT32_C(val) val##i32 +#define INT64_C(val) val##i64 + +#define UINT8_C(val) val##ui8 +#define UINT16_C(val) val##ui16 +#define UINT32_C(val) val##ui32 +#define UINT64_C(val) val##ui64 + +// 7.18.4.2 Macros for greatest-width integer constants +// These #ifndef's are needed to prevent collisions with <boost/cstdint.hpp>. +// Check out Issue 9 for the details. +#ifndef INTMAX_C // [ +#define INTMAX_C INT64_C +#endif // INTMAX_C ] +#ifndef UINTMAX_C // [ +#define UINTMAX_C UINT64_C +#endif // UINTMAX_C ] + +#endif // __STDC_CONSTANT_MACROS ] + +#endif // _MSC_VER >= 1600 ] + +#endif // _MSC_STDINT_H_ ]
\ No newline at end of file diff --git a/numpy/random/src/distributions/LICENSE.md b/numpy/random/src/distributions/LICENSE.md new file mode 100644 index 000000000..31576ba4b --- /dev/null +++ b/numpy/random/src/distributions/LICENSE.md @@ -0,0 +1,61 @@ +## NumPy + +Copyright (c) 2005-2017, NumPy Developers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + +* Neither the name of the NumPy Developers nor the names of any + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +## Julia + +The ziggurat methods were derived from Julia. + +Copyright (c) 2009-2019: Jeff Bezanson, Stefan Karpinski, Viral B. Shah, +and other contributors: + +https://github.com/JuliaLang/julia/contributors + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file diff --git a/numpy/random/src/distributions/binomial.h b/numpy/random/src/distributions/binomial.h new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/numpy/random/src/distributions/binomial.h diff --git a/numpy/random/src/distributions/distributions.c b/numpy/random/src/distributions/distributions.c new file mode 100644 index 000000000..83806de38 --- /dev/null +++ b/numpy/random/src/distributions/distributions.c @@ -0,0 +1,1811 @@ +#include "distributions.h" +#include "ziggurat.h" +#include "ziggurat_constants.h" + +#if defined(_MSC_VER) && defined(_WIN64) +#include <intrin.h> +#endif + +/* Random generators for external use */ +float random_float(brng_t *brng_state) { + return next_float(brng_state); +} + +double random_double(brng_t *brng_state) { + return next_double(brng_state); +} + +static NPY_INLINE double next_standard_exponential(brng_t *brng_state) { + return -log(1.0 - next_double(brng_state)); +} + +double random_standard_exponential(brng_t *brng_state) { + return next_standard_exponential(brng_state); +} + +void random_standard_exponential_fill(brng_t *brng_state, npy_intp cnt, + double *out) { + npy_intp i; + for (i = 0; i < cnt; i++) { + out[i] = next_standard_exponential(brng_state); + } +} + +float random_standard_exponential_f(brng_t *brng_state) { + return -logf(1.0f - next_float(brng_state)); +} + +void random_double_fill(brng_t *brng_state, npy_intp cnt, double *out) { + npy_intp i; + for (i = 0; i < cnt; i++) { + out[i] = next_double(brng_state); + } +} +#if 0 +double random_gauss(brng_t *brng_state) { + if (brng_state->has_gauss) { + const double temp = brng_state->gauss; + brng_state->has_gauss = false; + brng_state->gauss = 0.0; + return temp; + } else { + double f, x1, x2, r2; + + do { + x1 = 2.0 * next_double(brng_state) - 1.0; + x2 = 2.0 * next_double(brng_state) - 1.0; + r2 = x1 * x1 + x2 * x2; + } while (r2 >= 1.0 || r2 == 0.0); + + /* Polar method, a more efficient version of the Box-Muller approach. */ + f = sqrt(-2.0 * log(r2) / r2); + /* Keep for next call */ + brng_state->gauss = f * x1; + brng_state->has_gauss = true; + return f * x2; + } +} + +float random_gauss_f(brng_t *brng_state) { + if (brng_state->has_gauss_f) { + const float temp = brng_state->gauss_f; + brng_state->has_gauss_f = false; + brng_state->gauss_f = 0.0f; + return temp; + } else { + float f, x1, x2, r2; + + do { + x1 = 2.0f * next_float(brng_state) - 1.0f; + x2 = 2.0f * next_float(brng_state) - 1.0f; + r2 = x1 * x1 + x2 * x2; + } while (r2 >= 1.0 || r2 == 0.0); + + /* Polar method, a more efficient version of the Box-Muller approach. */ + f = sqrtf(-2.0f * logf(r2) / r2); + /* Keep for next call */ + brng_state->gauss_f = f * x1; + brng_state->has_gauss_f = true; + return f * x2; + } +} +#endif + +static NPY_INLINE double standard_exponential_zig(brng_t *brng_state); + +static double standard_exponential_zig_unlikely(brng_t *brng_state, uint8_t idx, + double x) { + if (idx == 0) { + return ziggurat_exp_r - log(next_double(brng_state)); + } else if ((fe_double[idx - 1] - fe_double[idx]) * next_double(brng_state) + + fe_double[idx] < + exp(-x)) { + return x; + } else { + return standard_exponential_zig(brng_state); + } +} + +static NPY_INLINE double standard_exponential_zig(brng_t *brng_state) { + uint64_t ri; + uint8_t idx; + double x; + ri = next_uint64(brng_state); + ri >>= 3; + idx = ri & 0xFF; + ri >>= 8; + x = ri * we_double[idx]; + if (ri < ke_double[idx]) { + return x; /* 98.9% of the time we return here 1st try */ + } + return standard_exponential_zig_unlikely(brng_state, idx, x); +} + +double random_standard_exponential_zig(brng_t *brng_state) { + return standard_exponential_zig(brng_state); +} + +void random_standard_exponential_zig_fill(brng_t *brng_state, npy_intp cnt, + double *out) { + npy_intp i; + for (i = 0; i < cnt; i++) { + out[i] = standard_exponential_zig(brng_state); + } +} + +static NPY_INLINE float standard_exponential_zig_f(brng_t *brng_state); + +static float standard_exponential_zig_unlikely_f(brng_t *brng_state, + uint8_t idx, float x) { + if (idx == 0) { + return ziggurat_exp_r_f - logf(next_float(brng_state)); + } else if ((fe_float[idx - 1] - fe_float[idx]) * next_float(brng_state) + + fe_float[idx] < + expf(-x)) { + return x; + } else { + return standard_exponential_zig_f(brng_state); + } +} + +static NPY_INLINE float standard_exponential_zig_f(brng_t *brng_state) { + uint32_t ri; + uint8_t idx; + float x; + ri = next_uint32(brng_state); + ri >>= 1; + idx = ri & 0xFF; + ri >>= 8; + x = ri * we_float[idx]; + if (ri < ke_float[idx]) { + return x; /* 98.9% of the time we return here 1st try */ + } + return standard_exponential_zig_unlikely_f(brng_state, idx, x); +} + +float random_standard_exponential_zig_f(brng_t *brng_state) { + return standard_exponential_zig_f(brng_state); +} + +static NPY_INLINE double next_gauss_zig(brng_t *brng_state) { + uint64_t r; + int sign; + int64_t rabs; + int idx; + double x, xx, yy; + for (;;) { + /* r = e3n52sb8 */ + r = next_uint64(brng_state); + idx = r & 0xff; + r >>= 8; + sign = r & 0x1; + rabs = (int64_t)((r >> 1) & 0x000fffffffffffff); + x = rabs * wi_double[idx]; + if (sign & 0x1) + x = -x; + if (rabs < ki_double[idx]) + return x; /* 99.3% of the time return here */ + if (idx == 0) { + for (;;) { + xx = -ziggurat_nor_inv_r * log(next_double(brng_state)); + yy = -log(next_double(brng_state)); + if (yy + yy > xx * xx) + return ((rabs >> 8) & 0x1) ? -(ziggurat_nor_r + xx) + : ziggurat_nor_r + xx; + } + } else { + if (((fi_double[idx - 1] - fi_double[idx]) * next_double(brng_state) + + fi_double[idx]) < exp(-0.5 * x * x)) + return x; + } + } +} + +double random_gauss_zig(brng_t *brng_state) { + return next_gauss_zig(brng_state); +} + +void random_gauss_zig_fill(brng_t *brng_state, npy_intp cnt, double *out) { + npy_intp i; + for (i = 0; i < cnt; i++) { + out[i] = next_gauss_zig(brng_state); + } +} + +float random_gauss_zig_f(brng_t *brng_state) { + uint32_t r; + int sign; + int32_t rabs; + int idx; + float x, xx, yy; + for (;;) { + /* r = n23sb8 */ + r = next_uint32(brng_state); + idx = r & 0xff; + sign = (r >> 8) & 0x1; + rabs = (int32_t)((r >> 9) & 0x0007fffff); + x = rabs * wi_float[idx]; + if (sign & 0x1) + x = -x; + if (rabs < ki_float[idx]) + return x; /* # 99.3% of the time return here */ + if (idx == 0) { + for (;;) { + xx = -ziggurat_nor_inv_r_f * logf(next_float(brng_state)); + yy = -logf(next_float(brng_state)); + if (yy + yy > xx * xx) + return ((rabs >> 8) & 0x1) ? -(ziggurat_nor_r_f + xx) + : ziggurat_nor_r_f + xx; + } + } else { + if (((fi_float[idx - 1] - fi_float[idx]) * next_float(brng_state) + + fi_float[idx]) < exp(-0.5 * x * x)) + return x; + } + } +} + +/* +static NPY_INLINE double standard_gamma(brng_t *brng_state, double shape) { + double b, c; + double U, V, X, Y; + + if (shape == 1.0) { + return random_standard_exponential(brng_state); + } else if (shape < 1.0) { + for (;;) { + U = next_double(brng_state); + V = random_standard_exponential(brng_state); + if (U <= 1.0 - shape) { + X = pow(U, 1. / shape); + if (X <= V) { + return X; + } + } else { + Y = -log((1 - U) / shape); + X = pow(1.0 - shape + shape * Y, 1. / shape); + if (X <= (V + Y)) { + return X; + } + } + } + } else { + b = shape - 1. / 3.; + c = 1. / sqrt(9 * b); + for (;;) { + do { + X = random_gauss(brng_state); + V = 1.0 + c * X; + } while (V <= 0.0); + + V = V * V * V; + U = next_double(brng_state); + if (U < 1.0 - 0.0331 * (X * X) * (X * X)) + return (b * V); + if (log(U) < 0.5 * X * X + b * (1. - V + log(V))) + return (b * V); + } + } +} + +static NPY_INLINE float standard_gamma_float(brng_t *brng_state, float shape) { + float b, c; + float U, V, X, Y; + + if (shape == 1.0f) { + return random_standard_exponential_f(brng_state); + } else if (shape < 1.0f) { + for (;;) { + U = next_float(brng_state); + V = random_standard_exponential_f(brng_state); + if (U <= 1.0f - shape) { + X = powf(U, 1.0f / shape); + if (X <= V) { + return X; + } + } else { + Y = -logf((1.0f - U) / shape); + X = powf(1.0f - shape + shape * Y, 1.0f / shape); + if (X <= (V + Y)) { + return X; + } + } + } + } else { + b = shape - 1.0f / 3.0f; + c = 1.0f / sqrtf(9.0f * b); + for (;;) { + do { + X = random_gauss_f(brng_state); + V = 1.0f + c * X; + } while (V <= 0.0f); + + V = V * V * V; + U = next_float(brng_state); + if (U < 1.0f - 0.0331f * (X * X) * (X * X)) + return (b * V); + if (logf(U) < 0.5f * X * X + b * (1.0f - V + logf(V))) + return (b * V); + } + } +} + + +double random_standard_gamma(brng_t *brng_state, double shape) { + return standard_gamma(brng_state, shape); +} + +float random_standard_gamma_f(brng_t *brng_state, float shape) { + return standard_gamma_float(brng_state, shape); +} +*/ + +static NPY_INLINE double standard_gamma_zig(brng_t *brng_state, double shape) { + double b, c; + double U, V, X, Y; + + if (shape == 1.0) { + return random_standard_exponential_zig(brng_state); + } else if (shape == 0.0) { + return 0.0; + } else if (shape < 1.0) { + for (;;) { + U = next_double(brng_state); + V = random_standard_exponential_zig(brng_state); + if (U <= 1.0 - shape) { + X = pow(U, 1. / shape); + if (X <= V) { + return X; + } + } else { + Y = -log((1 - U) / shape); + X = pow(1.0 - shape + shape * Y, 1. / shape); + if (X <= (V + Y)) { + return X; + } + } + } + } else { + b = shape - 1. / 3.; + c = 1. / sqrt(9 * b); + for (;;) { + do { + X = random_gauss_zig(brng_state); + V = 1.0 + c * X; + } while (V <= 0.0); + + V = V * V * V; + U = next_double(brng_state); + if (U < 1.0 - 0.0331 * (X * X) * (X * X)) + return (b * V); + if (log(U) < 0.5 * X * X + b * (1. - V + log(V))) + return (b * V); + } + } +} + +static NPY_INLINE float standard_gamma_zig_f(brng_t *brng_state, float shape) { + float b, c; + float U, V, X, Y; + + if (shape == 1.0f) { + return random_standard_exponential_zig_f(brng_state); + } else if (shape == 0.0) { + return 0.0; + } else if (shape < 1.0f) { + for (;;) { + U = next_float(brng_state); + V = random_standard_exponential_zig_f(brng_state); + if (U <= 1.0f - shape) { + X = powf(U, 1.0f / shape); + if (X <= V) { + return X; + } + } else { + Y = -logf((1.0f - U) / shape); + X = powf(1.0f - shape + shape * Y, 1.0f / shape); + if (X <= (V + Y)) { + return X; + } + } + } + } else { + b = shape - 1.0f / 3.0f; + c = 1.0f / sqrtf(9.0f * b); + for (;;) { + do { + X = random_gauss_zig_f(brng_state); + V = 1.0f + c * X; + } while (V <= 0.0f); + + V = V * V * V; + U = next_float(brng_state); + if (U < 1.0f - 0.0331f * (X * X) * (X * X)) + return (b * V); + if (logf(U) < 0.5f * X * X + b * (1.0f - V + logf(V))) + return (b * V); + } + } +} + +double random_standard_gamma_zig(brng_t *brng_state, double shape) { + return standard_gamma_zig(brng_state, shape); +} + +float random_standard_gamma_zig_f(brng_t *brng_state, float shape) { + return standard_gamma_zig_f(brng_state, shape); +} + +int64_t random_positive_int64(brng_t *brng_state) { + return next_uint64(brng_state) >> 1; +} + +int32_t random_positive_int32(brng_t *brng_state) { + return next_uint32(brng_state) >> 1; +} + +int64_t random_positive_int(brng_t *brng_state) { +#if ULONG_MAX <= 0xffffffffUL + return (int64_t)(next_uint32(brng_state) >> 1); +#else + return (int64_t)(next_uint64(brng_state) >> 1); +#endif +} + +uint64_t random_uint(brng_t *brng_state) { +#if ULONG_MAX <= 0xffffffffUL + return next_uint32(brng_state); +#else + return next_uint64(brng_state); +#endif +} + +/* + * log-gamma function to support some of these distributions. The + * algorithm comes from SPECFUN by Shanjie Zhang and Jianming Jin and their + * book "Computation of Special Functions", 1996, John Wiley & Sons, Inc. + */ +static double loggam(double x) { + double x0, x2, xp, gl, gl0; + int64_t k, n; + + static double a[10] = {8.333333333333333e-02, -2.777777777777778e-03, + 7.936507936507937e-04, -5.952380952380952e-04, + 8.417508417508418e-04, -1.917526917526918e-03, + 6.410256410256410e-03, -2.955065359477124e-02, + 1.796443723688307e-01, -1.39243221690590e+00}; + x0 = x; + n = 0; + if ((x == 1.0) || (x == 2.0)) { + return 0.0; + } else if (x <= 7.0) { + n = (int64_t)(7 - x); + x0 = x + n; + } + x2 = 1.0 / (x0 * x0); + xp = 2 * M_PI; + gl0 = a[9]; + for (k = 8; k >= 0; k--) { + gl0 *= x2; + gl0 += a[k]; + } + gl = gl0 / x0 + 0.5 * log(xp) + (x0 - 0.5) * log(x0) - x0; + if (x <= 7.0) { + for (k = 1; k <= n; k++) { + gl -= log(x0 - 1.0); + x0 -= 1.0; + } + } + return gl; +} + +/* +double random_normal(brng_t *brng_state, double loc, double scale) { + return loc + scale * random_gauss(brng_state); +} +*/ + +double random_normal_zig(brng_t *brng_state, double loc, double scale) { + return loc + scale * random_gauss_zig(brng_state); +} + +double random_exponential(brng_t *brng_state, double scale) { + return scale * standard_exponential_zig(brng_state); +} + +double random_uniform(brng_t *brng_state, double lower, double range) { + return lower + range * next_double(brng_state); +} + +double random_gamma(brng_t *brng_state, double shape, double scale) { + return scale * random_standard_gamma_zig(brng_state, shape); +} + +float random_gamma_float(brng_t *brng_state, float shape, float scale) { + return scale * random_standard_gamma_zig_f(brng_state, shape); +} + +double random_beta(brng_t *brng_state, double a, double b) { + double Ga, Gb; + + if ((a <= 1.0) && (b <= 1.0)) { + double U, V, X, Y; + /* Use Johnk's algorithm */ + + while (1) { + U = next_double(brng_state); + V = next_double(brng_state); + X = pow(U, 1.0 / a); + Y = pow(V, 1.0 / b); + + if ((X + Y) <= 1.0) { + if (X + Y > 0) { + return X / (X + Y); + } else { + double logX = log(U) / a; + double logY = log(V) / b; + double logM = logX > logY ? logX : logY; + logX -= logM; + logY -= logM; + + return exp(logX - log(exp(logX) + exp(logY))); + } + } + } + } else { + Ga = random_standard_gamma_zig(brng_state, a); + Gb = random_standard_gamma_zig(brng_state, b); + return Ga / (Ga + Gb); + } +} + +double random_chisquare(brng_t *brng_state, double df) { + return 2.0 * random_standard_gamma_zig(brng_state, df / 2.0); +} + +double random_f(brng_t *brng_state, double dfnum, double dfden) { + return ((random_chisquare(brng_state, dfnum) * dfden) / + (random_chisquare(brng_state, dfden) * dfnum)); +} + +double random_standard_cauchy(brng_t *brng_state) { + return random_gauss_zig(brng_state) / random_gauss_zig(brng_state); +} + +double random_pareto(brng_t *brng_state, double a) { + return exp(standard_exponential_zig(brng_state) / a) - 1; +} + +double random_weibull(brng_t *brng_state, double a) { + if (a == 0.0) { + return 0.0; + } + return pow(standard_exponential_zig(brng_state), 1. / a); +} + +double random_power(brng_t *brng_state, double a) { + return pow(1 - exp(-standard_exponential_zig(brng_state)), 1. / a); +} + +double random_laplace(brng_t *brng_state, double loc, double scale) { + double U; + + U = next_double(brng_state); + if (U < 0.5) { + U = loc + scale * log(U + U); + } else { + U = loc - scale * log(2.0 - U - U); + } + return U; +} + +double random_gumbel(brng_t *brng_state, double loc, double scale) { + double U; + + U = 1.0 - next_double(brng_state); + return loc - scale * log(-log(U)); +} + +double random_logistic(brng_t *brng_state, double loc, double scale) { + double U; + + U = next_double(brng_state); + return loc + scale * log(U / (1.0 - U)); +} + +double random_lognormal(brng_t *brng_state, double mean, double sigma) { + return exp(random_normal_zig(brng_state, mean, sigma)); +} + +double random_rayleigh(brng_t *brng_state, double mode) { + return mode * sqrt(-2.0 * log(1.0 - next_double(brng_state))); +} + +double random_standard_t(brng_t *brng_state, double df) { + double num, denom; + + num = random_gauss_zig(brng_state); + denom = random_standard_gamma_zig(brng_state, df / 2); + return sqrt(df / 2) * num / sqrt(denom); +} + +static int64_t random_poisson_mult(brng_t *brng_state, double lam) { + int64_t X; + double prod, U, enlam; + + enlam = exp(-lam); + X = 0; + prod = 1.0; + while (1) { + U = next_double(brng_state); + prod *= U; + if (prod > enlam) { + X += 1; + } else { + return X; + } + } +} + +/* + * The transformed rejection method for generating Poisson random variables + * W. Hoermann + * Insurance: Mathematics and Economics 12, 39-45 (1993) + */ +#define LS2PI 0.91893853320467267 +#define TWELFTH 0.083333333333333333333333 +static int64_t random_poisson_ptrs(brng_t *brng_state, double lam) { + int64_t k; + double U, V, slam, loglam, a, b, invalpha, vr, us; + + slam = sqrt(lam); + loglam = log(lam); + b = 0.931 + 2.53 * slam; + a = -0.059 + 0.02483 * b; + invalpha = 1.1239 + 1.1328 / (b - 3.4); + vr = 0.9277 - 3.6224 / (b - 2); + + while (1) { + U = next_double(brng_state) - 0.5; + V = next_double(brng_state); + us = 0.5 - fabs(U); + k = (int64_t)floor((2 * a / us + b) * U + lam + 0.43); + if ((us >= 0.07) && (V <= vr)) { + return k; + } + if ((k < 0) || ((us < 0.013) && (V > us))) { + continue; + } + if ((log(V) + log(invalpha) - log(a / (us * us) + b)) <= + (-lam + k * loglam - loggam(k + 1))) { + return k; + } + } +} + +int64_t random_poisson(brng_t *brng_state, double lam) { + if (lam >= 10) { + return random_poisson_ptrs(brng_state, lam); + } else if (lam == 0) { + return 0; + } else { + return random_poisson_mult(brng_state, lam); + } +} + +int64_t random_negative_binomial(brng_t *brng_state, double n, double p) { + double Y = random_gamma(brng_state, n, (1 - p) / p); + return random_poisson(brng_state, Y); +} + +int64_t random_binomial_btpe(brng_t *brng_state, int64_t n, double p, + binomial_t *binomial) { + double r, q, fm, p1, xm, xl, xr, c, laml, lamr, p2, p3, p4; + double a, u, v, s, F, rho, t, A, nrq, x1, x2, f1, f2, z, z2, w, w2, x; + int64_t m, y, k, i; + + if (!(binomial->has_binomial) || (binomial->nsave != n) || + (binomial->psave != p)) { + /* initialize */ + binomial->nsave = n; + binomial->psave = p; + binomial->has_binomial = 1; + binomial->r = r = MIN(p, 1.0 - p); + binomial->q = q = 1.0 - r; + binomial->fm = fm = n * r + r; + binomial->m = m = (int64_t)floor(binomial->fm); + binomial->p1 = p1 = floor(2.195 * sqrt(n * r * q) - 4.6 * q) + 0.5; + binomial->xm = xm = m + 0.5; + binomial->xl = xl = xm - p1; + binomial->xr = xr = xm + p1; + binomial->c = c = 0.134 + 20.5 / (15.3 + m); + a = (fm - xl) / (fm - xl * r); + binomial->laml = laml = a * (1.0 + a / 2.0); + a = (xr - fm) / (xr * q); + binomial->lamr = lamr = a * (1.0 + a / 2.0); + binomial->p2 = p2 = p1 * (1.0 + 2.0 * c); + binomial->p3 = p3 = p2 + c / laml; + binomial->p4 = p4 = p3 + c / lamr; + } else { + r = binomial->r; + q = binomial->q; + fm = binomial->fm; + m = binomial->m; + p1 = binomial->p1; + xm = binomial->xm; + xl = binomial->xl; + xr = binomial->xr; + c = binomial->c; + laml = binomial->laml; + lamr = binomial->lamr; + p2 = binomial->p2; + p3 = binomial->p3; + p4 = binomial->p4; + } + +/* sigh ... */ +Step10: + nrq = n * r * q; + u = next_double(brng_state) * p4; + v = next_double(brng_state); + if (u > p1) + goto Step20; + y = (int64_t)floor(xm - p1 * v + u); + goto Step60; + +Step20: + if (u > p2) + goto Step30; + x = xl + (u - p1) / c; + v = v * c + 1.0 - fabs(m - x + 0.5) / p1; + if (v > 1.0) + goto Step10; + y = (int64_t)floor(x); + goto Step50; + +Step30: + if (u > p3) + goto Step40; + y = (int64_t)floor(xl + log(v) / laml); + if (y < 0) + goto Step10; + v = v * (u - p2) * laml; + goto Step50; + +Step40: + y = (int64_t)floor(xr - log(v) / lamr); + if (y > n) + goto Step10; + v = v * (u - p3) * lamr; + +Step50: + k = llabs(y - m); + if ((k > 20) && (k < ((nrq) / 2.0 - 1))) + goto Step52; + + s = r / q; + a = s * (n + 1); + F = 1.0; + if (m < y) { + for (i = m + 1; i <= y; i++) { + F *= (a / i - s); + } + } else if (m > y) { + for (i = y + 1; i <= m; i++) { + F /= (a / i - s); + } + } + if (v > F) + goto Step10; + goto Step60; + +Step52: + rho = + (k / (nrq)) * ((k * (k / 3.0 + 0.625) + 0.16666666666666666) / nrq + 0.5); + t = -k * k / (2 * nrq); + A = log(v); + if (A < (t - rho)) + goto Step60; + if (A > (t + rho)) + goto Step10; + + x1 = y + 1; + f1 = m + 1; + z = n + 1 - m; + w = n - y + 1; + x2 = x1 * x1; + f2 = f1 * f1; + z2 = z * z; + w2 = w * w; + if (A > (xm * log(f1 / x1) + (n - m + 0.5) * log(z / w) + + (y - m) * log(w * r / (x1 * q)) + + (13680. - (462. - (132. - (99. - 140. / f2) / f2) / f2) / f2) / f1 / + 166320. + + (13680. - (462. - (132. - (99. - 140. / z2) / z2) / z2) / z2) / z / + 166320. + + (13680. - (462. - (132. - (99. - 140. / x2) / x2) / x2) / x2) / x1 / + 166320. + + (13680. - (462. - (132. - (99. - 140. / w2) / w2) / w2) / w2) / w / + 166320.)) { + goto Step10; + } + +Step60: + if (p > 0.5) { + y = n - y; + } + + return y; +} + +int64_t random_binomial_inversion(brng_t *brng_state, int64_t n, double p, + binomial_t *binomial) { + double q, qn, np, px, U; + int64_t X, bound; + + if (!(binomial->has_binomial) || (binomial->nsave != n) || + (binomial->psave != p)) { + binomial->nsave = n; + binomial->psave = p; + binomial->has_binomial = 1; + binomial->q = q = 1.0 - p; + binomial->r = qn = exp(n * log(q)); + binomial->c = np = n * p; + binomial->m = bound = (int64_t)MIN(n, np + 10.0 * sqrt(np * q + 1)); + } else { + q = binomial->q; + qn = binomial->r; + np = binomial->c; + bound = binomial->m; + } + X = 0; + px = qn; + U = next_double(brng_state); + while (U > px) { + X++; + if (X > bound) { + X = 0; + px = qn; + U = next_double(brng_state); + } else { + U -= px; + px = ((n - X + 1) * p * px) / (X * q); + } + } + return X; +} + +int64_t random_binomial(brng_t *brng_state, double p, int64_t n, + binomial_t *binomial) { + double q; + + if ((n == 0LL) || (p == 0.0f)) + return 0; + + if (p <= 0.5) { + if (p * n <= 30.0) { + return random_binomial_inversion(brng_state, n, p, binomial); + } else { + return random_binomial_btpe(brng_state, n, p, binomial); + } + } else { + q = 1.0 - p; + if (q * n <= 30.0) { + return n - random_binomial_inversion(brng_state, n, q, binomial); + } else { + return n - random_binomial_btpe(brng_state, n, q, binomial); + } + } +} + +double random_noncentral_chisquare(brng_t *brng_state, double df, double nonc) { + if (npy_isnan(nonc)){ + return NPY_NAN; + } + if (nonc == 0) { + return random_chisquare(brng_state, df); + } + if (1 < df) { + const double Chi2 = random_chisquare(brng_state, df - 1); + const double n = random_gauss_zig(brng_state) + sqrt(nonc); + return Chi2 + n * n; + } else { + const int64_t i = random_poisson(brng_state, nonc / 2.0); + return random_chisquare(brng_state, df + 2 * i); + } +} + +double random_noncentral_f(brng_t *brng_state, double dfnum, double dfden, + double nonc) { + double t = random_noncentral_chisquare(brng_state, dfnum, nonc) * dfden; + return t / (random_chisquare(brng_state, dfden) * dfnum); +} + +double random_wald(brng_t *brng_state, double mean, double scale) { + double U, X, Y; + double mu_2l; + + mu_2l = mean / (2 * scale); + Y = random_gauss_zig(brng_state); + Y = mean * Y * Y; + X = mean + mu_2l * (Y - sqrt(4 * scale * Y + Y * Y)); + U = next_double(brng_state); + if (U <= mean / (mean + X)) { + return X; + } else { + return mean * mean / X; + } +} + +double random_vonmises(brng_t *brng_state, double mu, double kappa) { + double s; + double U, V, W, Y, Z; + double result, mod; + int neg; + if (npy_isnan(kappa)){ + return NPY_NAN; + } + if (kappa < 1e-8) { + return M_PI * (2 * next_double(brng_state) - 1); + } else { + /* with double precision rho is zero until 1.4e-8 */ + if (kappa < 1e-5) { + /* + * second order taylor expansion around kappa = 0 + * precise until relatively large kappas as second order is 0 + */ + s = (1. / kappa + kappa); + } else { + double r = 1 + sqrt(1 + 4 * kappa * kappa); + double rho = (r - sqrt(2 * r)) / (2 * kappa); + s = (1 + rho * rho) / (2 * rho); + } + + while (1) { + U = next_double(brng_state); + Z = cos(M_PI * U); + W = (1 + s * Z) / (s + Z); + Y = kappa * (s - W); + V = next_double(brng_state); + if ((Y * (2 - Y) - V >= 0) || (log(Y / V) + 1 - Y >= 0)) { + break; + } + } + + U = next_double(brng_state); + + result = acos(W); + if (U < 0.5) { + result = -result; + } + result += mu; + neg = (result < 0); + mod = fabs(result); + mod = (fmod(mod + M_PI, 2 * M_PI) - M_PI); + if (neg) { + mod *= -1; + } + + return mod; + } +} + +int64_t random_logseries(brng_t *brng_state, double p) { + double q, r, U, V; + int64_t result; + + r = log(1.0 - p); + + while (1) { + V = next_double(brng_state); + if (V >= p) { + return 1; + } + U = next_double(brng_state); + q = 1.0 - exp(r * U); + if (V <= q * q) { + result = (int64_t)floor(1 + log(V) / log(q)); + if (result < 1) { + continue; + } else { + return result; + } + } + if (V >= q) { + return 1; + } + return 2; + } +} + +int64_t random_geometric_search(brng_t *brng_state, double p) { + double U; + int64_t X; + double sum, prod, q; + + X = 1; + sum = prod = p; + q = 1.0 - p; + U = next_double(brng_state); + while (U > sum) { + prod *= q; + sum += prod; + X++; + } + return X; +} + +int64_t random_geometric_inversion(brng_t *brng_state, double p) { + return (int64_t)ceil(log(1.0 - next_double(brng_state)) / log(1.0 - p)); +} + +int64_t random_geometric(brng_t *brng_state, double p) { + if (p >= 0.333333333333333333333333) { + return random_geometric_search(brng_state, p); + } else { + return random_geometric_inversion(brng_state, p); + } +} + +int64_t random_zipf(brng_t *brng_state, double a) { + double T, U, V; + int64_t X; + double am1, b; + + am1 = a - 1.0; + b = pow(2.0, am1); + do { + U = 1.0 - next_double(brng_state); + V = next_double(brng_state); + X = (int64_t)floor(pow(U, -1.0 / am1)); + /* The real result may be above what can be represented in a int64. + * It will get casted to -sys.maxint-1. Since this is + * a straightforward rejection algorithm, we can just reject this value + * in the rejection condition below. This function then models a Zipf + * distribution truncated to sys.maxint. + */ + T = pow(1.0 + 1.0 / X, am1); + } while (((V * X * (T - 1.0) / (b - 1.0)) > (T / b)) || X < 1); + return X; +} + +double random_triangular(brng_t *brng_state, double left, double mode, + double right) { + double base, leftbase, ratio, leftprod, rightprod; + double U; + + base = right - left; + leftbase = mode - left; + ratio = leftbase / base; + leftprod = leftbase * base; + rightprod = (right - mode) * base; + + U = next_double(brng_state); + if (U <= ratio) { + return left + sqrt(U * leftprod); + } else { + return right - sqrt((1.0 - U) * rightprod); + } +} + +int64_t random_hypergeometric_hyp(brng_t *brng_state, int64_t good, int64_t bad, + int64_t sample) { + int64_t d1, k, z; + double d2, u, y; + + d1 = bad + good - sample; + d2 = (double)MIN(bad, good); + + y = d2; + k = sample; + while (y > 0.0) { + u = next_double(brng_state); + y -= (int64_t)floor(u + y / (d1 + k)); + k--; + if (k == 0) + break; + } + z = (int64_t)(d2 - y); + if (good > bad) + z = sample - z; + return z; +} + +/* D1 = 2*sqrt(2/e) */ +/* D2 = 3 - 2*sqrt(3/e) */ +#define D1 1.7155277699214135 +#define D2 0.8989161620588988 +int64_t random_hypergeometric_hrua(brng_t *brng_state, int64_t good, + int64_t bad, int64_t sample) { + int64_t mingoodbad, maxgoodbad, popsize, m, d9; + double d4, d5, d6, d7, d8, d10, d11; + int64_t Z; + double T, W, X, Y; + + mingoodbad = MIN(good, bad); + popsize = good + bad; + maxgoodbad = MAX(good, bad); + m = MIN(sample, popsize - sample); + d4 = ((double)mingoodbad) / popsize; + d5 = 1.0 - d4; + d6 = m * d4 + 0.5; + d7 = sqrt((double)(popsize - m) * sample * d4 * d5 / (popsize - 1) + 0.5); + d8 = D1 * d7 + D2; + d9 = (int64_t)floor((double)(m + 1) * (mingoodbad + 1) / (popsize + 2)); + d10 = (loggam(d9 + 1) + loggam(mingoodbad - d9 + 1) + loggam(m - d9 + 1) + + loggam(maxgoodbad - m + d9 + 1)); + d11 = MIN(MIN(m, mingoodbad) + 1.0, floor(d6 + 16 * d7)); + /* 16 for 16-decimal-digit precision in D1 and D2 */ + + while (1) { + X = next_double(brng_state); + Y = next_double(brng_state); + W = d6 + d8 * (Y - 0.5) / X; + + /* fast rejection: */ + if ((W < 0.0) || (W >= d11)) + continue; + + Z = (int64_t)floor(W); + T = d10 - (loggam(Z + 1) + loggam(mingoodbad - Z + 1) + loggam(m - Z + 1) + + loggam(maxgoodbad - m + Z + 1)); + + /* fast acceptance: */ + if ((X * (4.0 - X) - 3.0) <= T) + break; + + /* fast rejection: */ + if (X * (X - T) >= 1) + continue; + + if (2.0 * log(X) <= T) + break; /* acceptance */ + } + + /* this is a correction to HRUA* by Ivan Frohne in rv.py */ + if (good > bad) + Z = m - Z; + + /* another fix from rv.py to allow sample to exceed popsize/2 */ + if (m < sample) + Z = good - Z; + + return Z; +} +#undef D1 +#undef D2 + +int64_t random_hypergeometric(brng_t *brng_state, int64_t good, int64_t bad, + int64_t sample) { + if (sample > 10) { + return random_hypergeometric_hrua(brng_state, good, bad, sample); + } else { + return random_hypergeometric_hyp(brng_state, good, bad, sample); + } +} + +uint64_t random_interval(brng_t *brng_state, uint64_t max) { + uint64_t mask, value; + if (max == 0) { + return 0; + } + + mask = max; + + /* Smallest bit mask >= max */ + mask |= mask >> 1; + mask |= mask >> 2; + mask |= mask >> 4; + mask |= mask >> 8; + mask |= mask >> 16; + mask |= mask >> 32; + + /* Search a random value in [0..mask] <= max */ + if (max <= 0xffffffffUL) { + while ((value = (next_uint32(brng_state) & mask)) > max) + ; + } else { + while ((value = (next_uint64(brng_state) & mask)) > max) + ; + } + return value; +} + +static NPY_INLINE uint64_t gen_mask(uint64_t max) { + uint64_t mask = max; + mask |= mask >> 1; + mask |= mask >> 2; + mask |= mask >> 4; + mask |= mask >> 8; + mask |= mask >> 16; + mask |= mask >> 32; + return mask; +} + +/* Generate 16 bit random numbers using a 32 bit buffer. */ +static NPY_INLINE uint16_t buffered_uint16(brng_t *brng_state, int *bcnt, + uint32_t *buf) { + if (!(bcnt[0])) { + buf[0] = next_uint32(brng_state); + bcnt[0] = 1; + } else { + buf[0] >>= 16; + bcnt[0] -= 1; + } + + return (uint16_t)buf[0]; +} + +/* Generate 8 bit random numbers using a 32 bit buffer. */ +static NPY_INLINE uint8_t buffered_uint8(brng_t *brng_state, int *bcnt, + uint32_t *buf) { + if (!(bcnt[0])) { + buf[0] = next_uint32(brng_state); + bcnt[0] = 3; + } else { + buf[0] >>= 8; + bcnt[0] -= 1; + } + + return (uint8_t)buf[0]; +} + +/* Static `masked rejection` function called by random_bounded_uint64(...) */ +static NPY_INLINE uint64_t bounded_masked_uint64(brng_t *brng_state, + uint64_t rng, uint64_t mask) { + uint64_t val; + + while ((val = (next_uint64(brng_state) & mask)) > rng) + ; + + return val; +} + +/* Static `masked rejection` function called by + * random_buffered_bounded_uint32(...) */ +static NPY_INLINE uint32_t buffered_bounded_masked_uint32( + brng_t *brng_state, uint32_t rng, uint32_t mask, int *bcnt, uint32_t *buf) { + /* + * The buffer and buffer count are not used here but are included to allow + * this function to be templated with the similar uint8 and uint16 + * functions + */ + + uint32_t val; + + while ((val = (next_uint32(brng_state) & mask)) > rng) + ; + + return val; +} + +/* Static `masked rejection` function called by + * random_buffered_bounded_uint16(...) */ +static NPY_INLINE uint16_t buffered_bounded_masked_uint16( + brng_t *brng_state, uint16_t rng, uint16_t mask, int *bcnt, uint32_t *buf) { + uint16_t val; + + while ((val = (buffered_uint16(brng_state, bcnt, buf) & mask)) > rng) + ; + + return val; +} + +/* Static `masked rejection` function called by + * random_buffered_bounded_uint8(...) */ +static NPY_INLINE uint8_t buffered_bounded_masked_uint8(brng_t *brng_state, + uint8_t rng, + uint8_t mask, int *bcnt, + uint32_t *buf) { + uint8_t val; + + while ((val = (buffered_uint8(brng_state, bcnt, buf) & mask)) > rng) + ; + + return val; +} + +/* Static `Lemire rejection` function called by random_bounded_uint64(...) */ +static NPY_INLINE uint64_t bounded_lemire_uint64(brng_t *brng_state, + uint64_t rng) { + /* + * Uses Lemire's algorithm - https://arxiv.org/abs/1805.10941 + * + * Note: `rng` should not be 0xFFFFFFFFFFFFFFFF. When this happens `rng_excl` + * becomes zero. + */ + const uint64_t rng_excl = rng + 1; + +#if __SIZEOF_INT128__ + /* 128-bit uint available (e.g. GCC/clang). `m` is the __uint128_t scaled + * integer. */ + __uint128_t m; + uint64_t leftover; + + /* Generate a scaled random number. */ + m = ((__uint128_t)next_uint64(brng_state)) * rng_excl; + + /* Rejection sampling to remove any bias. */ + leftover = m & 0xFFFFFFFFFFFFFFFFULL; + + if (leftover < rng_excl) { + /* `rng_excl` is a simple upper bound for `threshold`. */ + + const uint64_t threshold = -rng_excl % rng_excl; + /* Same as: threshold=((uint64_t)(0x10000000000000000ULLL - rng_excl)) % + * rng_excl; */ + + while (leftover < threshold) { + m = ((__uint128_t)next_uint64(brng_state)) * rng_excl; + leftover = m & 0xFFFFFFFFFFFFFFFFULL; + } + } + + return (m >> 64); +#else + /* 128-bit uint NOT available (e.g. MSVS). `m1` is the upper 64-bits of the + * scaled integer. */ + uint64_t m1; + uint64_t x; + uint64_t leftover; + + x = next_uint64(brng_state); + + /* Rejection sampling to remove any bias. */ + leftover = x * rng_excl; /* The lower 64-bits of the mult. */ + + if (leftover < rng_excl) { + /* `rng_excl` is a simple upper bound for `threshold`. */ + + const uint64_t threshold = -rng_excl % rng_excl; + /* Same as:threshold=((uint64_t)(0x10000000000000000ULLL - rng_excl)) % + * rng_excl; */ + + while (leftover < threshold) { + x = next_uint64(brng_state); + leftover = x * rng_excl; + } + } + +#if defined(_MSC_VER) && defined(_WIN64) + /* _WIN64 architecture. Use the __umulh intrinsic to calc `m1`. */ + m1 = __umulh(x, rng_excl); +#else + /* 32-bit architecture. Emulate __umulh to calc `m1`. */ + { + uint64_t x0, x1, rng_excl0, rng_excl1; + uint64_t w0, w1, w2, t; + + x0 = x & 0xFFFFFFFFULL; + x1 = x >> 32; + rng_excl0 = rng_excl & 0xFFFFFFFFULL; + rng_excl1 = rng_excl >> 32; + w0 = x0 * rng_excl0; + t = x1 * rng_excl0 + (w0 >> 32); + w1 = t & 0xFFFFFFFFULL; + w2 = t >> 32; + w1 += x0 * rng_excl1; + m1 = x1 * rng_excl1 + w2 + (w1 >> 32); + } +#endif + + return m1; +#endif +} + +/* Static `Lemire rejection` function called by + * random_buffered_bounded_uint32(...) */ +static NPY_INLINE uint32_t buffered_bounded_lemire_uint32(brng_t *brng_state, + uint32_t rng, + int *bcnt, + uint32_t *buf) { + /* + * Uses Lemire's algorithm - https://arxiv.org/abs/1805.10941 + * + * The buffer and buffer count are not used here but are included to allow + * this function to be templated with the similar uint8 and uint16 + * functions + * + * Note: `rng` should not be 0xFFFFFFFF. When this happens `rng_excl` becomes + * zero. + */ + const uint32_t rng_excl = rng + 1; + + uint64_t m; + uint32_t leftover; + + /* Generate a scaled random number. */ + m = ((uint64_t)next_uint32(brng_state)) * rng_excl; + + /* Rejection sampling to remove any bias */ + leftover = m & 0xFFFFFFFFUL; + + if (leftover < rng_excl) { + /* `rng_excl` is a simple upper bound for `threshold`. */ + const uint32_t threshold = -rng_excl % rng_excl; + /* Same as: threshold=((uint64_t)(0x100000000ULL - rng_excl)) % rng_excl; */ + + while (leftover < threshold) { + m = ((uint64_t)next_uint32(brng_state)) * rng_excl; + leftover = m & 0xFFFFFFFFUL; + } + } + + return (m >> 32); +} + +/* Static `Lemire rejection` function called by + * random_buffered_bounded_uint16(...) */ +static NPY_INLINE uint16_t buffered_bounded_lemire_uint16(brng_t *brng_state, + uint16_t rng, + int *bcnt, + uint32_t *buf) { + /* + * Uses Lemire's algorithm - https://arxiv.org/abs/1805.10941 + * + * Note: `rng` should not be 0xFFFF. When this happens `rng_excl` becomes + * zero. + */ + const uint16_t rng_excl = rng + 1; + + uint32_t m; + uint16_t leftover; + + /* Generate a scaled random number. */ + m = ((uint32_t)buffered_uint16(brng_state, bcnt, buf)) * rng_excl; + + /* Rejection sampling to remove any bias */ + leftover = m & 0xFFFFUL; + + if (leftover < rng_excl) { + /* `rng_excl` is a simple upper bound for `threshold`. */ + const uint16_t threshold = -rng_excl % rng_excl; + /* Same as: threshold=((uint32_t)(0x10000ULL - rng_excl)) % rng_excl; */ + + while (leftover < threshold) { + m = ((uint32_t)buffered_uint16(brng_state, bcnt, buf)) * rng_excl; + leftover = m & 0xFFFFUL; + } + } + + return (m >> 16); +} + +/* Static `Lemire rejection` function called by + * random_buffered_bounded_uint8(...) */ +static NPY_INLINE uint8_t buffered_bounded_lemire_uint8(brng_t *brng_state, + uint8_t rng, int *bcnt, + uint32_t *buf) { + /* + * Uses Lemire's algorithm - https://arxiv.org/abs/1805.10941 + * + * Note: `rng` should not be 0xFF. When this happens `rng_excl` becomes + * zero. + */ + const uint8_t rng_excl = rng + 1; + + uint16_t m; + uint8_t leftover; + + /* Generate a scaled random number. */ + m = ((uint16_t)buffered_uint8(brng_state, bcnt, buf)) * rng_excl; + + /* Rejection sampling to remove any bias */ + leftover = m & 0xFFUL; + + if (leftover < rng_excl) { + /* `rng_excl` is a simple upper bound for `threshold`. */ + const uint8_t threshold = -rng_excl % rng_excl; + /* Same as: threshold=((uint16_t)(0x100ULL - rng_excl)) % rng_excl; */ + + while (leftover < threshold) { + m = ((uint16_t)buffered_uint8(brng_state, bcnt, buf)) * rng_excl; + leftover = m & 0xFFUL; + } + } + + return (m >> 8); +} + +/* + * Returns a single random npy_uint64 between off and off + rng + * inclusive. The numbers wrap if rng is sufficiently large. + */ +uint64_t random_bounded_uint64(brng_t *brng_state, uint64_t off, uint64_t rng, + uint64_t mask, bool use_masked) { + if (rng == 0) { + return off; + } else if (rng < 0xFFFFFFFFUL) { + /* Call 32-bit generator if range in 32-bit. */ + if (use_masked) { + return off + + buffered_bounded_masked_uint32(brng_state, rng, mask, NULL, NULL); + } else { + return off + buffered_bounded_lemire_uint32(brng_state, rng, NULL, NULL); + } + } else if (rng == 0xFFFFFFFFFFFFFFFFULL) { + /* Lemire64 doesn't support inclusive rng = 0xFFFFFFFFFFFFFFFF. */ + return off + next_uint64(brng_state); + } else { + if (use_masked) { + return off + bounded_masked_uint64(brng_state, rng, mask); + } else { + return off + bounded_lemire_uint64(brng_state, rng); + } + } +} + +/* + * Returns a single random npy_uint64 between off and off + rng + * inclusive. The numbers wrap if rng is sufficiently large. + */ +uint32_t random_buffered_bounded_uint32(brng_t *brng_state, uint32_t off, + uint32_t rng, uint32_t mask, + bool use_masked, int *bcnt, + uint32_t *buf) { + /* + * Unused bcnt and buf are here only to allow templating with other uint + * generators. + */ + if (rng == 0) { + return off; + } else if (rng == 0xFFFFFFFFUL) { + /* Lemire32 doesn't support inclusive rng = 0xFFFFFFFF. */ + return off + next_uint32(brng_state); + } else { + if (use_masked) { + return off + + buffered_bounded_masked_uint32(brng_state, rng, mask, bcnt, buf); + } else { + return off + buffered_bounded_lemire_uint32(brng_state, rng, bcnt, buf); + } + } +} + +/* + * Returns a single random npy_uint16 between off and off + rng + * inclusive. The numbers wrap if rng is sufficiently large. + */ +uint16_t random_buffered_bounded_uint16(brng_t *brng_state, uint16_t off, + uint16_t rng, uint16_t mask, + bool use_masked, int *bcnt, + uint32_t *buf) { + if (rng == 0) { + return off; + } else if (rng == 0xFFFFUL) { + /* Lemire16 doesn't support inclusive rng = 0xFFFF. */ + return off + buffered_uint16(brng_state, bcnt, buf); + } else { + if (use_masked) { + return off + + buffered_bounded_masked_uint16(brng_state, rng, mask, bcnt, buf); + } else { + return off + buffered_bounded_lemire_uint16(brng_state, rng, bcnt, buf); + } + } +} + +/* + * Returns a single random npy_uint8 between off and off + rng + * inclusive. The numbers wrap if rng is sufficiently large. + */ +uint8_t random_buffered_bounded_uint8(brng_t *brng_state, uint8_t off, + uint8_t rng, uint8_t mask, + bool use_masked, int *bcnt, + uint32_t *buf) { + if (rng == 0) { + return off; + } else if (rng == 0xFFUL) { + /* Lemire8 doesn't support inclusive rng = 0xFF. */ + return off + buffered_uint8(brng_state, bcnt, buf); + } else { + if (use_masked) { + return off + + buffered_bounded_masked_uint8(brng_state, rng, mask, bcnt, buf); + } else { + return off + buffered_bounded_lemire_uint8(brng_state, rng, bcnt, buf); + } + } +} + +static NPY_INLINE npy_bool buffered_bounded_bool(brng_t *brng_state, + npy_bool off, npy_bool rng, + npy_bool mask, int *bcnt, + uint32_t *buf) { + if (rng == 0) + return off; + if (!(bcnt[0])) { + buf[0] = next_uint32(brng_state); + bcnt[0] = 31; + } else { + buf[0] >>= 1; + bcnt[0] -= 1; + } + return (buf[0] & 0x00000001UL) != 0; +} + +npy_bool random_buffered_bounded_bool(brng_t *brng_state, npy_bool off, + npy_bool rng, npy_bool mask, + bool use_masked, int *bcnt, + uint32_t *buf) { + return buffered_bounded_bool(brng_state, off, rng, mask, bcnt, buf); +} + +/* + * Fills an array with cnt random npy_uint64 between off and off + rng + * inclusive. The numbers wrap if rng is sufficiently large. + */ +void random_bounded_uint64_fill(brng_t *brng_state, uint64_t off, uint64_t rng, + npy_intp cnt, bool use_masked, uint64_t *out) { + npy_intp i; + + if (rng == 0) { + for (i = 0; i < cnt; i++) { + out[i] = off; + } + } else if (rng < 0xFFFFFFFFUL) { + uint32_t buf = 0; + int bcnt = 0; + + /* Call 32-bit generator if range in 32-bit. */ + if (use_masked) { + /* Smallest bit mask >= max */ + uint64_t mask = gen_mask(rng); + + for (i = 0; i < cnt; i++) { + out[i] = off + buffered_bounded_masked_uint32(brng_state, rng, mask, + &bcnt, &buf); + } + } else { + for (i = 0; i < cnt; i++) { + out[i] = + off + buffered_bounded_lemire_uint32(brng_state, rng, &bcnt, &buf); + } + } + } else if (rng == 0xFFFFFFFFFFFFFFFFULL) { + /* Lemire64 doesn't support rng = 0xFFFFFFFFFFFFFFFF. */ + for (i = 0; i < cnt; i++) { + out[i] = off + next_uint64(brng_state); + } + } else { + if (use_masked) { + /* Smallest bit mask >= max */ + uint64_t mask = gen_mask(rng); + + for (i = 0; i < cnt; i++) { + out[i] = off + bounded_masked_uint64(brng_state, rng, mask); + } + } else { + for (i = 0; i < cnt; i++) { + out[i] = off + bounded_lemire_uint64(brng_state, rng); + } + } + } +} + +/* + * Fills an array with cnt random npy_uint32 between off and off + rng + * inclusive. The numbers wrap if rng is sufficiently large. + */ +void random_bounded_uint32_fill(brng_t *brng_state, uint32_t off, uint32_t rng, + npy_intp cnt, bool use_masked, uint32_t *out) { + npy_intp i; + uint32_t buf = 0; + int bcnt = 0; + + if (rng == 0) { + for (i = 0; i < cnt; i++) { + out[i] = off; + } + } else if (rng == 0xFFFFFFFFUL) { + /* Lemire32 doesn't support rng = 0xFFFFFFFF. */ + for (i = 0; i < cnt; i++) { + out[i] = off + next_uint32(brng_state); + } + } else { + if (use_masked) { + /* Smallest bit mask >= max */ + uint32_t mask = (uint32_t)gen_mask(rng); + + for (i = 0; i < cnt; i++) { + out[i] = off + buffered_bounded_masked_uint32(brng_state, rng, mask, + &bcnt, &buf); + } + } else { + for (i = 0; i < cnt; i++) { + out[i] = + off + buffered_bounded_lemire_uint32(brng_state, rng, &bcnt, &buf); + } + } + } +} + +/* + * Fills an array with cnt random npy_uint16 between off and off + rng + * inclusive. The numbers wrap if rng is sufficiently large. + */ +void random_bounded_uint16_fill(brng_t *brng_state, uint16_t off, uint16_t rng, + npy_intp cnt, bool use_masked, uint16_t *out) { + npy_intp i; + uint32_t buf = 0; + int bcnt = 0; + + if (rng == 0) { + for (i = 0; i < cnt; i++) { + out[i] = off; + } + } else if (rng == 0xFFFFUL) { + /* Lemire16 doesn't support rng = 0xFFFF. */ + for (i = 0; i < cnt; i++) { + out[i] = off + buffered_uint16(brng_state, &bcnt, &buf); + } + } else { + if (use_masked) { + /* Smallest bit mask >= max */ + uint16_t mask = (uint16_t)gen_mask(rng); + + for (i = 0; i < cnt; i++) { + out[i] = off + buffered_bounded_masked_uint16(brng_state, rng, mask, + &bcnt, &buf); + } + } else { + for (i = 0; i < cnt; i++) { + out[i] = + off + buffered_bounded_lemire_uint16(brng_state, rng, &bcnt, &buf); + } + } + } +} + +/* + * Fills an array with cnt random npy_uint8 between off and off + rng + * inclusive. The numbers wrap if rng is sufficiently large. + */ +void random_bounded_uint8_fill(brng_t *brng_state, uint8_t off, uint8_t rng, + npy_intp cnt, bool use_masked, uint8_t *out) { + npy_intp i; + uint32_t buf = 0; + int bcnt = 0; + + if (rng == 0) { + for (i = 0; i < cnt; i++) { + out[i] = off; + } + } else if (rng == 0xFFUL) { + /* Lemire8 doesn't support rng = 0xFF. */ + for (i = 0; i < cnt; i++) { + out[i] = off + buffered_uint8(brng_state, &bcnt, &buf); + } + } else { + if (use_masked) { + /* Smallest bit mask >= max */ + uint8_t mask = (uint8_t)gen_mask(rng); + + for (i = 0; i < cnt; i++) { + out[i] = off + buffered_bounded_masked_uint8(brng_state, rng, mask, + &bcnt, &buf); + } + } else { + for (i = 0; i < cnt; i++) { + out[i] = + off + buffered_bounded_lemire_uint8(brng_state, rng, &bcnt, &buf); + } + } + } +} + +/* + * Fills an array with cnt random npy_bool between off and off + rng + * inclusive. + */ +void random_bounded_bool_fill(brng_t *brng_state, npy_bool off, npy_bool rng, + npy_intp cnt, bool use_masked, npy_bool *out) { + npy_bool mask = 0; + npy_intp i; + uint32_t buf = 0; + int bcnt = 0; + + for (i = 0; i < cnt; i++) { + out[i] = buffered_bounded_bool(brng_state, off, rng, mask, &bcnt, &buf); + } +} diff --git a/numpy/random/src/distributions/distributions.h b/numpy/random/src/distributions/distributions.h new file mode 100644 index 000000000..7ca31a16c --- /dev/null +++ b/numpy/random/src/distributions/distributions.h @@ -0,0 +1,220 @@ +#ifndef _RANDOMDGEN__DISTRIBUTIONS_H_ +#define _RANDOMDGEN__DISTRIBUTIONS_H_ + +#pragma once +#include <stddef.h> +#ifdef _WIN32 +#if _MSC_VER == 1500 +#include "../common/stdint.h" +typedef int bool; +#define false 0 +#define true 1 +#else +#include <stdbool.h> +#include <stdint.h> +#endif +#else +#include <stdbool.h> +#include <stdint.h> +#endif + +#include "Python.h" +#include "numpy/npy_common.h" +#include "numpy/npy_math.h" + +#ifdef _WIN32 +#if _MSC_VER == 1500 + +static NPY_INLINE int64_t llabs(int64_t x) { + int64_t o; + if (x < 0) { + o = -x; + } else { + o = x; + } + return o; +} +#endif +#endif + +#ifdef DLL_EXPORT +#define DECLDIR __declspec(dllexport) +#else +#define DECLDIR extern +#endif + +#ifndef MIN +#define MIN(x, y) (((x) < (y)) ? x : y) +#define MAX(x, y) (((x) > (y)) ? x : y) +#endif + +#ifndef M_PI +#define M_PI 3.14159265358979323846264338328 +#endif + +typedef struct s_binomial_t { + int has_binomial; /* !=0: following parameters initialized for binomial */ + double psave; + int64_t nsave; + double r; + double q; + double fm; + int64_t m; + double p1; + double xm; + double xl; + double xr; + double c; + double laml; + double lamr; + double p2; + double p3; + double p4; +} binomial_t; + +typedef struct brng { + void *state; + uint64_t (*next_uint64)(void *st); + uint32_t (*next_uint32)(void *st); + double (*next_double)(void *st); + uint64_t (*next_raw)(void *st); +} brng_t; + +/* Inline generators for internal use */ +static NPY_INLINE uint32_t next_uint32(brng_t *brng_state) { + return brng_state->next_uint32(brng_state->state); +} + +static NPY_INLINE uint64_t next_uint64(brng_t *brng_state) { + return brng_state->next_uint64(brng_state->state); +} + +static NPY_INLINE float next_float(brng_t *brng_state) { + return (next_uint32(brng_state) >> 9) * (1.0f / 8388608.0f); +} + +static NPY_INLINE double next_double(brng_t *brng_state) { + return brng_state->next_double(brng_state->state); +} + +DECLDIR float random_float(brng_t *brng_state); +DECLDIR double random_double(brng_t *brng_state); +DECLDIR void random_double_fill(brng_t *brng_state, npy_intp cnt, double *out); + +DECLDIR int64_t random_positive_int64(brng_t *brng_state); +DECLDIR int32_t random_positive_int32(brng_t *brng_state); +DECLDIR int64_t random_positive_int(brng_t *brng_state); +DECLDIR uint64_t random_uint(brng_t *brng_state); + +DECLDIR double random_standard_exponential(brng_t *brng_state); +DECLDIR void random_standard_exponential_fill(brng_t *brng_state, npy_intp cnt, + double *out); +DECLDIR float random_standard_exponential_f(brng_t *brng_state); +DECLDIR double random_standard_exponential_zig(brng_t *brng_state); +DECLDIR void random_standard_exponential_zig_fill(brng_t *brng_state, + npy_intp cnt, double *out); +DECLDIR float random_standard_exponential_zig_f(brng_t *brng_state); + +/* +DECLDIR double random_gauss(brng_t *brng_state); +DECLDIR float random_gauss_f(brng_t *brng_state); +*/ +DECLDIR double random_gauss_zig(brng_t *brng_state); +DECLDIR float random_gauss_zig_f(brng_t *brng_state); +DECLDIR void random_gauss_zig_fill(brng_t *brng_state, npy_intp cnt, + double *out); + +/* +DECLDIR double random_standard_gamma(brng_t *brng_state, double shape); +DECLDIR float random_standard_gamma_f(brng_t *brng_state, float shape); +*/ +DECLDIR double random_standard_gamma_zig(brng_t *brng_state, double shape); +DECLDIR float random_standard_gamma_zig_f(brng_t *brng_state, float shape); + +/* +DECLDIR double random_normal(brng_t *brng_state, double loc, double scale); +*/ +DECLDIR double random_normal_zig(brng_t *brng_state, double loc, double scale); + +DECLDIR double random_gamma(brng_t *brng_state, double shape, double scale); +DECLDIR float random_gamma_float(brng_t *brng_state, float shape, float scale); + +DECLDIR double random_exponential(brng_t *brng_state, double scale); +DECLDIR double random_uniform(brng_t *brng_state, double lower, double range); +DECLDIR double random_beta(brng_t *brng_state, double a, double b); +DECLDIR double random_chisquare(brng_t *brng_state, double df); +DECLDIR double random_f(brng_t *brng_state, double dfnum, double dfden); +DECLDIR double random_standard_cauchy(brng_t *brng_state); +DECLDIR double random_pareto(brng_t *brng_state, double a); +DECLDIR double random_weibull(brng_t *brng_state, double a); +DECLDIR double random_power(brng_t *brng_state, double a); +DECLDIR double random_laplace(brng_t *brng_state, double loc, double scale); +DECLDIR double random_gumbel(brng_t *brng_state, double loc, double scale); +DECLDIR double random_logistic(brng_t *brng_state, double loc, double scale); +DECLDIR double random_lognormal(brng_t *brng_state, double mean, double sigma); +DECLDIR double random_rayleigh(brng_t *brng_state, double mode); +DECLDIR double random_standard_t(brng_t *brng_state, double df); +DECLDIR double random_noncentral_chisquare(brng_t *brng_state, double df, + double nonc); +DECLDIR double random_noncentral_f(brng_t *brng_state, double dfnum, + double dfden, double nonc); +DECLDIR double random_wald(brng_t *brng_state, double mean, double scale); +DECLDIR double random_vonmises(brng_t *brng_state, double mu, double kappa); +DECLDIR double random_triangular(brng_t *brng_state, double left, double mode, + double right); + +DECLDIR int64_t random_poisson(brng_t *brng_state, double lam); +DECLDIR int64_t random_negative_binomial(brng_t *brng_state, double n, + double p); +DECLDIR int64_t random_binomial(brng_t *brng_state, double p, int64_t n, + binomial_t *binomial); +DECLDIR int64_t random_logseries(brng_t *brng_state, double p); +DECLDIR int64_t random_geometric_search(brng_t *brng_state, double p); +DECLDIR int64_t random_geometric_inversion(brng_t *brng_state, double p); +DECLDIR int64_t random_geometric(brng_t *brng_state, double p); +DECLDIR int64_t random_zipf(brng_t *brng_state, double a); +DECLDIR int64_t random_hypergeometric(brng_t *brng_state, int64_t good, + int64_t bad, int64_t sample); + +DECLDIR uint64_t random_interval(brng_t *brng_state, uint64_t max); + +/* Generate random uint64 numbers in closed interval [off, off + rng]. */ +DECLDIR uint64_t random_bounded_uint64(brng_t *brng_state, uint64_t off, + uint64_t rng, uint64_t mask, + bool use_masked); + +/* Generate random uint32 numbers in closed interval [off, off + rng]. */ +DECLDIR uint32_t random_buffered_bounded_uint32(brng_t *brng_state, + uint32_t off, uint32_t rng, + uint32_t mask, bool use_masked, + int *bcnt, uint32_t *buf); +DECLDIR uint16_t random_buffered_bounded_uint16(brng_t *brng_state, + uint16_t off, uint16_t rng, + uint16_t mask, bool use_masked, + int *bcnt, uint32_t *buf); +DECLDIR uint8_t random_buffered_bounded_uint8(brng_t *brng_state, uint8_t off, + uint8_t rng, uint8_t mask, + bool use_masked, int *bcnt, + uint32_t *buf); +DECLDIR npy_bool random_buffered_bounded_bool(brng_t *brng_state, npy_bool off, + npy_bool rng, npy_bool mask, + bool use_masked, int *bcnt, + uint32_t *buf); + +DECLDIR void random_bounded_uint64_fill(brng_t *brng_state, uint64_t off, + uint64_t rng, npy_intp cnt, + bool use_masked, uint64_t *out); +DECLDIR void random_bounded_uint32_fill(brng_t *brng_state, uint32_t off, + uint32_t rng, npy_intp cnt, + bool use_masked, uint32_t *out); +DECLDIR void random_bounded_uint16_fill(brng_t *brng_state, uint16_t off, + uint16_t rng, npy_intp cnt, + bool use_masked, uint16_t *out); +DECLDIR void random_bounded_uint8_fill(brng_t *brng_state, uint8_t off, + uint8_t rng, npy_intp cnt, + bool use_masked, uint8_t *out); +DECLDIR void random_bounded_bool_fill(brng_t *brng_state, npy_bool off, + npy_bool rng, npy_intp cnt, + bool use_masked, npy_bool *out); + +#endif diff --git a/numpy/random/src/distributions/ziggurat.h b/numpy/random/src/distributions/ziggurat.h new file mode 100644 index 000000000..7808c0e68 --- /dev/null +++ b/numpy/random/src/distributions/ziggurat.h @@ -0,0 +1,276 @@ +/* + * Constants from Julia's Ziggurat implementation + */ + +static const uint64_t ki[] = { + 0x0007799ec012f7b2, 0x0000000000000000, 0x0006045f4c7de363, + 0x0006d1aa7d5ec0a5, 0x000728fb3f60f777, 0x0007592af4e9fbc0, + 0x000777a5c0bf655d, 0x00078ca3857d2256, 0x00079bf6b0ffe58b, + 0x0007a7a34ab092ad, 0x0007b0d2f20dd1cb, 0x0007b83d3aa9cb52, + 0x0007be597614224d, 0x0007c3788631abe9, 0x0007c7d32bc192ee, + 0x0007cb9263a6e86d, 0x0007ced483edfa84, 0x0007d1b07ac0fd39, + 0x0007d437ef2da5fc, 0x0007d678b069aa6e, 0x0007d87db38c5c87, + 0x0007da4fc6a9ba62, 0x0007dbf611b37f3b, 0x0007dd7674d0f286, + 0x0007ded5ce8205f6, 0x0007e018307fb62b, 0x0007e141081bd124, + 0x0007e2533d712de8, 0x0007e3514bbd7718, 0x0007e43d54944b52, + 0x0007e5192f25ef42, 0x0007e5e67481118d, 0x0007e6a6897c1ce2, + 0x0007e75aa6c7f64c, 0x0007e803df8ee498, 0x0007e8a326eb6272, + 0x0007e93954717a28, 0x0007e9c727f8648f, 0x0007ea4d4cc85a3c, + 0x0007eacc5c4907a9, 0x0007eb44e0474cf6, 0x0007ebb754e47419, + 0x0007ec242a3d8474, 0x0007ec8bc5d69645, 0x0007ecee83d3d6e9, + 0x0007ed4cb8082f45, 0x0007eda6aee0170f, 0x0007edfcae2dfe68, + 0x0007ee4ef5dccd3e, 0x0007ee9dc08c394e, 0x0007eee9441a17c7, + 0x0007ef31b21b4fb1, 0x0007ef773846a8a7, 0x0007efba00d35a17, + 0x0007effa32ccf69f, 0x0007f037f25e1278, 0x0007f0736112d12c, + 0x0007f0ac9e145c25, 0x0007f0e3c65e1fcc, 0x0007f118f4ed8e54, + 0x0007f14c42ed0dc8, 0x0007f17dc7daa0c3, 0x0007f1ad99aac6a5, + 0x0007f1dbcce80015, 0x0007f20874cf56bf, 0x0007f233a36a3b9a, + 0x0007f25d69a604ad, 0x0007f285d7694a92, 0x0007f2acfba75e3b, + 0x0007f2d2e4720909, 0x0007f2f79f09c344, 0x0007f31b37ec883b, + 0x0007f33dbae36abc, 0x0007f35f330f08d5, 0x0007f37faaf2fa79, + 0x0007f39f2c805380, 0x0007f3bdc11f4f1c, 0x0007f3db71b83850, + 0x0007f3f846bba121, 0x0007f4144829f846, 0x0007f42f7d9a8b9d, + 0x0007f449ee420432, 0x0007f463a0f8675e, 0x0007f47c9c3ea77b, + 0x0007f494e643cd8e, 0x0007f4ac84e9c475, 0x0007f4c37dc9cd50, + 0x0007f4d9d638a432, 0x0007f4ef934a5b6a, 0x0007f504b9d5f33d, + 0x0007f5194e78b352, 0x0007f52d55994a96, 0x0007f540d36aba0c, + 0x0007f553cbef0e77, 0x0007f56642f9ec8f, 0x0007f5783c32f31e, + 0x0007f589bb17f609, 0x0007f59ac2ff1525, 0x0007f5ab5718b15a, + 0x0007f5bb7a71427c, 0x0007f5cb2ff31009, 0x0007f5da7a67cebe, + 0x0007f5e95c7a24e7, 0x0007f5f7d8b7171e, 0x0007f605f18f5ef4, + 0x0007f613a958ad0a, 0x0007f621024ed7e9, 0x0007f62dfe94f8cb, + 0x0007f63aa036777a, 0x0007f646e928065a, 0x0007f652db488f88, + 0x0007f65e786213ff, 0x0007f669c22a7d8a, 0x0007f674ba446459, + 0x0007f67f623fc8db, 0x0007f689bb9ac294, 0x0007f693c7c22481, + 0x0007f69d881217a6, 0x0007f6a6fdd6ac36, 0x0007f6b02a4c61ee, + 0x0007f6b90ea0a7f4, 0x0007f6c1abf254c0, 0x0007f6ca03521664, + 0x0007f6d215c2db82, 0x0007f6d9e43a3559, 0x0007f6e16fa0b329, + 0x0007f6e8b8d23729, 0x0007f6efc09e4569, 0x0007f6f687c84cbf, + 0x0007f6fd0f07ea09, 0x0007f703570925e2, 0x0007f709606cad03, + 0x0007f70f2bc8036f, 0x0007f714b9a5b292, 0x0007f71a0a85725d, + 0x0007f71f1edc4d9e, 0x0007f723f714c179, 0x0007f728938ed843, + 0x0007f72cf4a03fa0, 0x0007f7311a945a16, 0x0007f73505ac4bf8, + 0x0007f738b61f03bd, 0x0007f73c2c193dc0, 0x0007f73f67bd835c, + 0x0007f74269242559, 0x0007f745305b31a1, 0x0007f747bd666428, + 0x0007f74a103f12ed, 0x0007f74c28d414f5, 0x0007f74e0709a42d, + 0x0007f74faab939f9, 0x0007f75113b16657, 0x0007f75241b5a155, + 0x0007f753347e16b8, 0x0007f753ebb76b7c, 0x0007f75467027d05, + 0x0007f754a5f4199d, 0x0007f754a814b207, 0x0007f7546ce003ae, + 0x0007f753f3c4bb29, 0x0007f7533c240e92, 0x0007f75245514f41, + 0x0007f7510e91726c, 0x0007f74f971a9012, 0x0007f74dde135797, + 0x0007f74be2927971, 0x0007f749a39e051c, 0x0007f747202aba8a, + 0x0007f744571b4e3c, 0x0007f741473f9efe, 0x0007f73def53dc43, + 0x0007f73a4dff9bff, 0x0007f73661d4deaf, 0x0007f732294f003f, + 0x0007f72da2d19444, 0x0007f728cca72bda, 0x0007f723a5000367, + 0x0007f71e29f09627, 0x0007f7185970156b, 0x0007f7123156c102, + 0x0007f70baf5c1e2c, 0x0007f704d1150a23, 0x0007f6fd93f1a4e5, + 0x0007f6f5f53b10b6, 0x0007f6edf211023e, 0x0007f6e587671ce9, + 0x0007f6dcb2021679, 0x0007f6d36e749c64, 0x0007f6c9b91bf4c6, + 0x0007f6bf8e1c541b, 0x0007f6b4e95ce015, 0x0007f6a9c68356ff, + 0x0007f69e20ef5211, 0x0007f691f3b517eb, 0x0007f6853997f321, + 0x0007f677ed03ff19, 0x0007f66a08075bdc, 0x0007f65b844ab75a, + 0x0007f64c5b091860, 0x0007f63c8506d4bc, 0x0007f62bfa8798fe, + 0x0007f61ab34364b0, 0x0007f608a65a599a, 0x0007f5f5ca4737e8, + 0x0007f5e214d05b48, 0x0007f5cd7af7066e, 0x0007f5b7f0e4c2a1, + 0x0007f5a169d68fcf, 0x0007f589d80596a5, 0x0007f5712c8d0174, + 0x0007f557574c912b, 0x0007f53c46c77193, 0x0007f51fe7feb9f2, + 0x0007f5022646ecfb, 0x0007f4e2eb17ab1d, 0x0007f4c21dd4a3d1, + 0x0007f49fa38ea394, 0x0007f47b5ebb62eb, 0x0007f4552ee27473, + 0x0007f42cf03d58f5, 0x0007f4027b48549f, 0x0007f3d5a44119df, + 0x0007f3a63a8fb552, 0x0007f37408155100, 0x0007f33ed05b55ec, + 0x0007f3064f9c183e, 0x0007f2ca399c7ba1, 0x0007f28a384bb940, + 0x0007f245ea1b7a2b, 0x0007f1fcdffe8f1b, 0x0007f1ae9af758cd, + 0x0007f15a8917f27e, 0x0007f10001ccaaab, 0x0007f09e413c418a, + 0x0007f034627733d7, 0x0007efc15815b8d5, 0x0007ef43e2bf7f55, + 0x0007eeba84e31dfe, 0x0007ee237294df89, 0x0007ed7c7c170141, + 0x0007ecc2f0d95d3a, 0x0007ebf377a46782, 0x0007eb09d6deb285, + 0x0007ea00a4f17808, 0x0007e8d0d3da63d6, 0x0007e771023b0fcf, + 0x0007e5d46c2f08d8, 0x0007e3e937669691, 0x0007e195978f1176, + 0x0007deb2c0e05c1c, 0x0007db0362002a19, 0x0007d6202c151439, + 0x0007cf4b8f00a2cb, 0x0007c4fd24520efd, 0x0007b362fbf81816, + 0x00078d2d25998e24}; + +static const double wi[] = { + 1.7367254121602630e-15, 9.5586603514556339e-17, 1.2708704834810623e-16, + 1.4909740962495474e-16, 1.6658733631586268e-16, 1.8136120810119029e-16, + 1.9429720153135588e-16, 2.0589500628482093e-16, 2.1646860576895422e-16, + 2.2622940392218116e-16, 2.3532718914045892e-16, 2.4387234557428771e-16, + 2.5194879829274225e-16, 2.5962199772528103e-16, 2.6694407473648285e-16, + 2.7395729685142446e-16, 2.8069646002484804e-16, 2.8719058904113930e-16, + 2.9346417484728883e-16, 2.9953809336782113e-16, 3.0543030007192440e-16, + 3.1115636338921572e-16, 3.1672988018581815e-16, 3.2216280350549905e-16, + 3.2746570407939751e-16, 3.3264798116841710e-16, 3.3771803417353232e-16, + 3.4268340353119356e-16, 3.4755088731729758e-16, 3.5232663846002031e-16, + 3.5701624633953494e-16, 3.6162480571598339e-16, 3.6615697529653540e-16, + 3.7061702777236077e-16, 3.7500889278747798e-16, 3.7933619401549554e-16, + 3.8360228129677279e-16, 3.8781025861250247e-16, 3.9196300853257678e-16, + 3.9606321366256378e-16, 4.0011337552546690e-16, 4.0411583124143332e-16, + 4.0807276830960448e-16, 4.1198623774807442e-16, 4.1585816580828064e-16, + 4.1969036444740733e-16, 4.2348454071520708e-16, 4.2724230518899761e-16, + 4.3096517957162941e-16, 4.3465460355128760e-16, 4.3831194100854571e-16, + 4.4193848564470665e-16, 4.4553546609579137e-16, 4.4910405058828750e-16, + 4.5264535118571397e-16, 4.5616042766900381e-16, 4.5965029108849407e-16, + 4.6311590702081647e-16, 4.6655819856008752e-16, 4.6997804906941950e-16, + 4.7337630471583237e-16, 4.7675377680908526e-16, 4.8011124396270155e-16, + 4.8344945409350080e-16, 4.8676912627422087e-16, 4.9007095245229938e-16, + 4.9335559904654139e-16, 4.9662370843221783e-16, 4.9987590032409088e-16, + 5.0311277306593187e-16, 5.0633490483427195e-16, 5.0954285476338923e-16, + 5.1273716399787966e-16, 5.1591835667857364e-16, 5.1908694086703434e-16, + 5.2224340941340417e-16, 5.2538824077194543e-16, 5.2852189976823820e-16, + 5.3164483832166176e-16, 5.3475749612647295e-16, 5.3786030129452348e-16, + 5.4095367096239933e-16, 5.4403801186554671e-16, 5.4711372088173611e-16, + 5.5018118554603362e-16, 5.5324078453927836e-16, 5.5629288815190902e-16, + 5.5933785872484621e-16, 5.6237605106900435e-16, 5.6540781286489604e-16, + 5.6843348504368141e-16, 5.7145340215092040e-16, 5.7446789269419609e-16, + 5.7747727947569648e-16, 5.8048187991076857e-16, 5.8348200633338921e-16, + 5.8647796628943653e-16, 5.8947006281858718e-16, 5.9245859472561339e-16, + 5.9544385684180598e-16, 5.9842614027720281e-16, 6.0140573266426640e-16, + 6.0438291839361250e-16, 6.0735797884236057e-16, 6.1033119259564394e-16, + 6.1330283566179110e-16, 6.1627318168165963e-16, 6.1924250213258470e-16, + 6.2221106652737879e-16, 6.2517914260879998e-16, 6.2814699653988953e-16, + 6.3111489309056042e-16, 6.3408309582080600e-16, 6.3705186726088149e-16, + 6.4002146908880247e-16, 6.4299216230548961e-16, 6.4596420740788321e-16, + 6.4893786456033965e-16, 6.5191339376461587e-16, 6.5489105502874154e-16, + 6.5787110853507413e-16, 6.6085381480782587e-16, 6.6383943488035057e-16, + 6.6682823046247459e-16, 6.6982046410815579e-16, 6.7281639938375311e-16, + 6.7581630103719006e-16, 6.7882043516829803e-16, 6.8182906940062540e-16, + 6.8484247305500383e-16, 6.8786091732516637e-16, 6.9088467545571690e-16, + 6.9391402292275690e-16, 6.9694923761748294e-16, 6.9999060003307640e-16, + 7.0303839345521508e-16, 7.0609290415654822e-16, 7.0915442159548734e-16, + 7.1222323861967788e-16, 7.1529965167453030e-16, 7.1838396101720629e-16, + 7.2147647093647067e-16, 7.2457748997883870e-16, 7.2768733118146927e-16, + 7.3080631231227429e-16, 7.3393475611774048e-16, 7.3707299057898310e-16, + 7.4022134917657997e-16, 7.4338017116476479e-16, 7.4654980185558890e-16, + 7.4973059291369793e-16, 7.5292290266240584e-16, 7.5612709640179217e-16, + 7.5934354673958895e-16, 7.6257263393567558e-16, 7.6581474626104873e-16, + 7.6907028037219191e-16, 7.7233964170182985e-16, 7.7562324486711744e-16, + 7.7892151409638524e-16, 7.8223488367564108e-16, 7.8556379841610841e-16, + 7.8890871414417552e-16, 7.9227009821522709e-16, 7.9564843005293662e-16, + 7.9904420171571300e-16, 8.0245791849212591e-16, 8.0589009952726568e-16, + 8.0934127848215009e-16, 8.1281200422845008e-16, 8.1630284158098775e-16, + 8.1981437207065329e-16, 8.2334719476060504e-16, 8.2690192710884700e-16, + 8.3047920588053737e-16, 8.3407968811366288e-16, 8.3770405214202216e-16, + 8.4135299867980282e-16, 8.4502725197240968e-16, 8.4872756101861549e-16, + 8.5245470086955962e-16, 8.5620947401062333e-16, 8.5999271183276646e-16, + 8.6380527620052589e-16, 8.6764806112455816e-16, 8.7152199454736980e-16, + 8.7542804025171749e-16, 8.7936719990210427e-16, 8.8334051523084080e-16, + 8.8734907038131345e-16, 8.9139399442240861e-16, 8.9547646404950677e-16, + 8.9959770648910994e-16, 9.0375900262601175e-16, 9.0796169037400680e-16, + 9.1220716831348461e-16, 9.1649689962191353e-16, 9.2083241632623076e-16, + 9.2521532390956933e-16, 9.2964730630864167e-16, 9.3413013134252651e-16, + 9.3866565661866598e-16, 9.4325583596767065e-16, 9.4790272646517382e-16, + 9.5260849610662787e-16, 9.5737543220974496e-16, 9.6220595062948384e-16, + 9.6710260588230542e-16, 9.7206810229016259e-16, 9.7710530627072088e-16, + 9.8221725991905411e-16, 9.8740719604806711e-16, 9.9267855488079765e-16, + 9.9803500261836449e-16, 1.0034804521436181e-15, 1.0090190861637457e-15, + 1.0146553831467086e-15, 1.0203941464683124e-15, 1.0262405372613567e-15, + 1.0322001115486456e-15, 1.0382788623515399e-15, 1.0444832676000471e-15, + 1.0508203448355195e-15, 1.0572977139009890e-15, 1.0639236690676801e-15, + 1.0707072623632994e-15, 1.0776584002668106e-15, 1.0847879564403425e-15, + 1.0921079038149563e-15, 1.0996314701785628e-15, 1.1073733224935752e-15, + 1.1153497865853155e-15, 1.1235791107110833e-15, 1.1320817840164846e-15, + 1.1408809242582780e-15, 1.1500027537839792e-15, 1.1594771891449189e-15, + 1.1693385786910960e-15, 1.1796266352955801e-15, 1.1903876299282890e-15, + 1.2016759392543819e-15, 1.2135560818666897e-15, 1.2261054417450561e-15, + 1.2394179789163251e-15, 1.2536093926602567e-15, 1.2688244814255010e-15, + 1.2852479319096109e-15, 1.3031206634689985e-15, 1.3227655770195326e-15, + 1.3446300925011171e-15, 1.3693606835128518e-15, 1.3979436672775240e-15, + 1.4319989869661328e-15, 1.4744848603597596e-15, 1.5317872741611144e-15, + 1.6227698675312968e-15}; + +static const double fi[] = { + 1.0000000000000000e+00, 9.7710170126767082e-01, 9.5987909180010600e-01, + 9.4519895344229909e-01, 9.3206007595922991e-01, 9.1999150503934646e-01, + 9.0872644005213032e-01, 8.9809592189834297e-01, 8.8798466075583282e-01, + 8.7830965580891684e-01, 8.6900868803685649e-01, 8.6003362119633109e-01, + 8.5134625845867751e-01, 8.4291565311220373e-01, 8.3471629298688299e-01, + 8.2672683394622093e-01, 8.1892919160370192e-01, 8.1130787431265572e-01, + 8.0384948317096383e-01, 7.9654233042295841e-01, 7.8937614356602404e-01, + 7.8234183265480195e-01, 7.7543130498118662e-01, 7.6863731579848571e-01, + 7.6195334683679483e-01, 7.5537350650709567e-01, 7.4889244721915638e-01, + 7.4250529634015061e-01, 7.3620759812686210e-01, 7.2999526456147568e-01, + 7.2386453346862967e-01, 7.1781193263072152e-01, 7.1183424887824798e-01, + 7.0592850133275376e-01, 7.0009191813651117e-01, 6.9432191612611627e-01, + 6.8861608300467136e-01, 6.8297216164499430e-01, 6.7738803621877308e-01, + 6.7186171989708166e-01, 6.6639134390874977e-01, 6.6097514777666277e-01, + 6.5561147057969693e-01, 6.5029874311081637e-01, 6.4503548082082196e-01, + 6.3982027745305614e-01, 6.3465179928762327e-01, 6.2952877992483625e-01, + 6.2445001554702606e-01, 6.1941436060583399e-01, 6.1442072388891344e-01, + 6.0946806492577310e-01, 6.0455539069746733e-01, 5.9968175261912482e-01, + 5.9484624376798689e-01, 5.9004799633282545e-01, 5.8528617926337090e-01, + 5.8055999610079034e-01, 5.7586868297235316e-01, 5.7121150673525267e-01, + 5.6658776325616389e-01, 5.6199677581452390e-01, 5.5743789361876550e-01, + 5.5291049042583185e-01, 5.4841396325526537e-01, 5.4394773119002582e-01, + 5.3951123425695158e-01, 5.3510393238045717e-01, 5.3072530440366150e-01, + 5.2637484717168403e-01, 5.2205207467232140e-01, 5.1775651722975591e-01, + 5.1348772074732651e-01, 5.0924524599574761e-01, 5.0502866794346790e-01, + 5.0083757512614835e-01, 4.9667156905248933e-01, 4.9253026364386815e-01, + 4.8841328470545758e-01, 4.8432026942668288e-01, 4.8025086590904642e-01, + 4.7620473271950547e-01, 4.7218153846772976e-01, 4.6818096140569321e-01, + 4.6420268904817391e-01, 4.6024641781284248e-01, 4.5631185267871610e-01, + 4.5239870686184824e-01, 4.4850670150720273e-01, 4.4463556539573912e-01, + 4.4078503466580377e-01, 4.3695485254798533e-01, 4.3314476911265209e-01, + 4.2935454102944126e-01, 4.2558393133802180e-01, 4.2183270922949573e-01, + 4.1810064983784795e-01, 4.1438753404089090e-01, 4.1069314827018799e-01, + 4.0701728432947315e-01, 4.0335973922111429e-01, 3.9972031498019700e-01, + 3.9609881851583223e-01, 3.9249506145931540e-01, 3.8890886001878855e-01, + 3.8534003484007706e-01, 3.8178841087339344e-01, 3.7825381724561896e-01, + 3.7473608713789086e-01, 3.7123505766823922e-01, 3.6775056977903225e-01, + 3.6428246812900372e-01, 3.6083060098964775e-01, 3.5739482014578022e-01, + 3.5397498080007656e-01, 3.5057094148140588e-01, 3.4718256395679348e-01, + 3.4380971314685055e-01, 3.4045225704452164e-01, 3.3711006663700588e-01, + 3.3378301583071823e-01, 3.3047098137916342e-01, 3.2717384281360129e-01, + 3.2389148237639104e-01, 3.2062378495690530e-01, 3.1737063802991350e-01, + 3.1413193159633707e-01, 3.1090755812628634e-01, 3.0769741250429189e-01, + 3.0450139197664983e-01, 3.0131939610080288e-01, 2.9815132669668531e-01, + 2.9499708779996164e-01, 2.9185658561709499e-01, 2.8872972848218270e-01, + 2.8561642681550159e-01, 2.8251659308370741e-01, 2.7943014176163772e-01, + 2.7635698929566810e-01, 2.7329705406857691e-01, 2.7025025636587519e-01, + 2.6721651834356114e-01, 2.6419576399726080e-01, 2.6118791913272082e-01, + 2.5819291133761890e-01, 2.5521066995466168e-01, 2.5224112605594190e-01, + 2.4928421241852824e-01, 2.4633986350126363e-01, 2.4340801542275012e-01, + 2.4048860594050039e-01, 2.3758157443123795e-01, 2.3468686187232990e-01, + 2.3180441082433859e-01, 2.2893416541468023e-01, 2.2607607132238020e-01, + 2.2323007576391746e-01, 2.2039612748015194e-01, 2.1757417672433113e-01, + 2.1476417525117358e-01, 2.1196607630703015e-01, 2.0917983462112499e-01, + 2.0640540639788071e-01, 2.0364274931033485e-01, 2.0089182249465656e-01, + 1.9815258654577511e-01, 1.9542500351413428e-01, 1.9270903690358912e-01, + 1.9000465167046496e-01, 1.8731181422380025e-01, 1.8463049242679927e-01, + 1.8196065559952254e-01, 1.7930227452284767e-01, 1.7665532144373500e-01, + 1.7401977008183875e-01, 1.7139559563750595e-01, 1.6878277480121151e-01, + 1.6618128576448205e-01, 1.6359110823236570e-01, 1.6101222343751107e-01, + 1.5844461415592431e-01, 1.5588826472447920e-01, 1.5334316106026283e-01, + 1.5080929068184568e-01, 1.4828664273257453e-01, 1.4577520800599403e-01, + 1.4327497897351341e-01, 1.4078594981444470e-01, 1.3830811644855071e-01, + 1.3584147657125373e-01, 1.3338602969166913e-01, 1.3094177717364430e-01, + 1.2850872227999952e-01, 1.2608687022018586e-01, 1.2367622820159654e-01, + 1.2127680548479021e-01, 1.1888861344290998e-01, 1.1651166562561080e-01, + 1.1414597782783835e-01, 1.1179156816383801e-01, 1.0944845714681163e-01, + 1.0711666777468364e-01, 1.0479622562248690e-01, 1.0248715894193508e-01, + 1.0018949876880981e-01, 9.7903279038862284e-02, 9.5628536713008819e-02, + 9.3365311912690860e-02, 9.1113648066373634e-02, 8.8873592068275789e-02, + 8.6645194450557961e-02, 8.4428509570353374e-02, 8.2223595813202863e-02, + 8.0030515814663056e-02, 7.7849336702096039e-02, 7.5680130358927067e-02, + 7.3522973713981268e-02, 7.1377949058890375e-02, 6.9245144397006769e-02, + 6.7124653827788497e-02, 6.5016577971242842e-02, 6.2921024437758113e-02, + 6.0838108349539864e-02, 5.8767952920933758e-02, 5.6710690106202902e-02, + 5.4666461324888914e-02, 5.2635418276792176e-02, 5.0617723860947761e-02, + 4.8613553215868521e-02, 4.6623094901930368e-02, 4.4646552251294443e-02, + 4.2684144916474431e-02, 4.0736110655940933e-02, 3.8802707404526113e-02, + 3.6884215688567284e-02, 3.4980941461716084e-02, 3.3093219458578522e-02, + 3.1221417191920245e-02, 2.9365939758133314e-02, 2.7527235669603082e-02, + 2.5705804008548896e-02, 2.3902203305795882e-02, 2.2117062707308864e-02, + 2.0351096230044517e-02, 1.8605121275724643e-02, 1.6880083152543166e-02, + 1.5177088307935325e-02, 1.3497450601739880e-02, 1.1842757857907888e-02, + 1.0214971439701471e-02, 8.6165827693987316e-03, 7.0508754713732268e-03, + 5.5224032992509968e-03, 4.0379725933630305e-03, 2.6090727461021627e-03, + 1.2602859304985975e-03}; + +static const double ziggurat_nor_r = 3.6541528853610087963519472518; +static const double ziggurat_nor_inv_r = + 0.27366123732975827203338247596; // 1.0 / ziggurat_nor_r; +static const double ziggurat_exp_r = 7.6971174701310497140446280481; + +static const float ziggurat_nor_r_f = 3.6541528853610087963519472518f; +static const float ziggurat_nor_inv_r_f = 0.27366123732975827203338247596f; +static const float ziggurat_exp_r_f = 7.6971174701310497140446280481f; diff --git a/numpy/random/src/distributions/ziggurat_constants.h b/numpy/random/src/distributions/ziggurat_constants.h new file mode 100644 index 000000000..17eccec0f --- /dev/null +++ b/numpy/random/src/distributions/ziggurat_constants.h @@ -0,0 +1,1196 @@ +static const uint64_t ki_double[] = { + 0x000EF33D8025EF6AULL, 0x0000000000000000ULL, 0x000C08BE98FBC6A8ULL, + 0x000DA354FABD8142ULL, 0x000E51F67EC1EEEAULL, 0x000EB255E9D3F77EULL, + 0x000EEF4B817ECAB9ULL, 0x000F19470AFA44AAULL, 0x000F37ED61FFCB18ULL, + 0x000F4F469561255CULL, 0x000F61A5E41BA396ULL, 0x000F707A755396A4ULL, + 0x000F7CB2EC28449AULL, 0x000F86F10C6357D3ULL, 0x000F8FA6578325DEULL, + 0x000F9724C74DD0DAULL, 0x000F9DA907DBF509ULL, 0x000FA360F581FA74ULL, + 0x000FA86FDE5B4BF8ULL, 0x000FACF160D354DCULL, 0x000FB0FB6718B90FULL, + 0x000FB49F8D5374C6ULL, 0x000FB7EC2366FE77ULL, 0x000FBAECE9A1E50EULL, + 0x000FBDAB9D040BEDULL, 0x000FC03060FF6C57ULL, 0x000FC2821037A248ULL, + 0x000FC4A67AE25BD1ULL, 0x000FC6A2977AEE31ULL, 0x000FC87AA92896A4ULL, + 0x000FCA325E4BDE85ULL, 0x000FCBCCE902231AULL, 0x000FCD4D12F839C4ULL, + 0x000FCEB54D8FEC99ULL, 0x000FD007BF1DC930ULL, 0x000FD1464DD6C4E6ULL, + 0x000FD272A8E2F450ULL, 0x000FD38E4FF0C91EULL, 0x000FD49A9990B478ULL, + 0x000FD598B8920F53ULL, 0x000FD689C08E99ECULL, 0x000FD76EA9C8E832ULL, + 0x000FD848547B08E8ULL, 0x000FD9178BAD2C8CULL, 0x000FD9DD07A7ADD2ULL, + 0x000FDA9970105E8CULL, 0x000FDB4D5DC02E20ULL, 0x000FDBF95C5BFCD0ULL, + 0x000FDC9DEBB99A7DULL, 0x000FDD3B8118729DULL, 0x000FDDD288342F90ULL, + 0x000FDE6364369F64ULL, 0x000FDEEE708D514EULL, 0x000FDF7401A6B42EULL, + 0x000FDFF46599ED40ULL, 0x000FE06FE4BC24F2ULL, 0x000FE0E6C225A258ULL, + 0x000FE1593C28B84CULL, 0x000FE1C78CBC3F99ULL, 0x000FE231E9DB1CAAULL, + 0x000FE29885DA1B91ULL, 0x000FE2FB8FB54186ULL, 0x000FE35B33558D4AULL, + 0x000FE3B799D0002AULL, 0x000FE410E99EAD7FULL, 0x000FE46746D47734ULL, + 0x000FE4BAD34C095CULL, 0x000FE50BAED29524ULL, 0x000FE559F74EBC78ULL, + 0x000FE5A5C8E41212ULL, 0x000FE5EF3E138689ULL, 0x000FE6366FD91078ULL, + 0x000FE67B75C6D578ULL, 0x000FE6BE661E11AAULL, 0x000FE6FF55E5F4F2ULL, + 0x000FE73E5900A702ULL, 0x000FE77B823E9E39ULL, 0x000FE7B6E37070A2ULL, + 0x000FE7F08D774243ULL, 0x000FE8289053F08CULL, 0x000FE85EFB35173AULL, + 0x000FE893DC840864ULL, 0x000FE8C741F0CEBCULL, 0x000FE8F9387D4EF6ULL, + 0x000FE929CC879B1DULL, 0x000FE95909D388EAULL, 0x000FE986FB939AA2ULL, + 0x000FE9B3AC714866ULL, 0x000FE9DF2694B6D5ULL, 0x000FEA0973ABE67CULL, + 0x000FEA329CF166A4ULL, 0x000FEA5AAB32952CULL, 0x000FEA81A6D5741AULL, + 0x000FEAA797DE1CF0ULL, 0x000FEACC85F3D920ULL, 0x000FEAF07865E63CULL, + 0x000FEB13762FEC13ULL, 0x000FEB3585FE2A4AULL, 0x000FEB56AE3162B4ULL, + 0x000FEB76F4E284FAULL, 0x000FEB965FE62014ULL, 0x000FEBB4F4CF9D7CULL, + 0x000FEBD2B8F449D0ULL, 0x000FEBEFB16E2E3EULL, 0x000FEC0BE31EBDE8ULL, + 0x000FEC2752B15A15ULL, 0x000FEC42049DAFD3ULL, 0x000FEC5BFD29F196ULL, + 0x000FEC75406CEEF4ULL, 0x000FEC8DD2500CB4ULL, 0x000FECA5B6911F12ULL, + 0x000FECBCF0C427FEULL, 0x000FECD38454FB15ULL, 0x000FECE97488C8B3ULL, + 0x000FECFEC47F91B7ULL, 0x000FED1377358528ULL, 0x000FED278F844903ULL, + 0x000FED3B10242F4CULL, 0x000FED4DFBAD586EULL, 0x000FED605498C3DDULL, + 0x000FED721D414FE8ULL, 0x000FED8357E4A982ULL, 0x000FED9406A42CC8ULL, + 0x000FEDA42B85B704ULL, 0x000FEDB3C8746AB4ULL, 0x000FEDC2DF416652ULL, + 0x000FEDD171A46E52ULL, 0x000FEDDF813C8AD3ULL, 0x000FEDED0F909980ULL, + 0x000FEDFA1E0FD414ULL, 0x000FEE06AE124BC4ULL, 0x000FEE12C0D95A06ULL, + 0x000FEE1E579006E0ULL, 0x000FEE29734B6524ULL, 0x000FEE34150AE4BCULL, + 0x000FEE3E3DB89B3CULL, 0x000FEE47EE2982F4ULL, 0x000FEE51271DB086ULL, + 0x000FEE59E9407F41ULL, 0x000FEE623528B42EULL, 0x000FEE6A0B5897F1ULL, + 0x000FEE716C3E077AULL, 0x000FEE7858327B82ULL, 0x000FEE7ECF7B06BAULL, + 0x000FEE84D2484AB2ULL, 0x000FEE8A60B66343ULL, 0x000FEE8F7ACCC851ULL, + 0x000FEE94207E25DAULL, 0x000FEE9851A829EAULL, 0x000FEE9C0E13485CULL, + 0x000FEE9F557273F4ULL, 0x000FEEA22762CCAEULL, 0x000FEEA4836B42ACULL, + 0x000FEEA668FC2D71ULL, 0x000FEEA7D76ED6FAULL, 0x000FEEA8CE04FA0AULL, + 0x000FEEA94BE8333BULL, 0x000FEEA950296410ULL, 0x000FEEA8D9C0075EULL, + 0x000FEEA7E7897654ULL, 0x000FEEA678481D24ULL, 0x000FEEA48AA29E83ULL, + 0x000FEEA21D22E4DAULL, 0x000FEE9F2E352024ULL, 0x000FEE9BBC26AF2EULL, + 0x000FEE97C524F2E4ULL, 0x000FEE93473C0A3AULL, 0x000FEE8E40557516ULL, + 0x000FEE88AE369C7AULL, 0x000FEE828E7F3DFDULL, 0x000FEE7BDEA7B888ULL, + 0x000FEE749BFF37FFULL, 0x000FEE6CC3A9BD5EULL, 0x000FEE64529E007EULL, + 0x000FEE5B45A32888ULL, 0x000FEE51994E57B6ULL, 0x000FEE474A0006CFULL, + 0x000FEE3C53E12C50ULL, 0x000FEE30B2E02AD8ULL, 0x000FEE2462AD8205ULL, + 0x000FEE175EB83C5AULL, 0x000FEE09A22A1447ULL, 0x000FEDFB27E349CCULL, + 0x000FEDEBEA76216CULL, 0x000FEDDBE422047EULL, 0x000FEDCB0ECE39D3ULL, + 0x000FEDB964042CF4ULL, 0x000FEDA6DCE938C9ULL, 0x000FED937237E98DULL, + 0x000FED7F1C38A836ULL, 0x000FED69D2B9C02BULL, 0x000FED538D06AE00ULL, + 0x000FED3C41DEA422ULL, 0x000FED23E76A2FD8ULL, 0x000FED0A732FE644ULL, + 0x000FECEFDA07FE34ULL, 0x000FECD4100EB7B8ULL, 0x000FECB708956EB4ULL, + 0x000FEC98B61230C1ULL, 0x000FEC790A0DA978ULL, 0x000FEC57F50F31FEULL, + 0x000FEC356686C962ULL, 0x000FEC114CB4B335ULL, 0x000FEBEB948E6FD0ULL, + 0x000FEBC429A0B692ULL, 0x000FEB9AF5EE0CDCULL, 0x000FEB6FE1C98542ULL, + 0x000FEB42D3AD1F9EULL, 0x000FEB13B00B2D4BULL, 0x000FEAE2591A02E9ULL, + 0x000FEAAEAE992257ULL, 0x000FEA788D8EE326ULL, 0x000FEA3FCFFD73E5ULL, + 0x000FEA044C8DD9F6ULL, 0x000FE9C5D62F563BULL, 0x000FE9843BA947A4ULL, + 0x000FE93F471D4728ULL, 0x000FE8F6BD76C5D6ULL, 0x000FE8AA5DC4E8E6ULL, + 0x000FE859E07AB1EAULL, 0x000FE804F690A940ULL, 0x000FE7AB488233C0ULL, + 0x000FE74C751F6AA5ULL, 0x000FE6E8102AA202ULL, 0x000FE67DA0B6ABD8ULL, + 0x000FE60C9F38307EULL, 0x000FE5947338F742ULL, 0x000FE51470977280ULL, + 0x000FE48BD436F458ULL, 0x000FE3F9BFFD1E37ULL, 0x000FE35D35EEB19CULL, + 0x000FE2B5122FE4FEULL, 0x000FE20003995557ULL, 0x000FE13C82788314ULL, + 0x000FE068C4EE67B0ULL, 0x000FDF82B02B71AAULL, 0x000FDE87C57EFEAAULL, + 0x000FDD7509C63BFDULL, 0x000FDC46E529BF13ULL, 0x000FDAF8F82E0282ULL, + 0x000FD985E1B2BA75ULL, 0x000FD7E6EF48CF04ULL, 0x000FD613ADBD650BULL, + 0x000FD40149E2F012ULL, 0x000FD1A1A7B4C7ACULL, 0x000FCEE204761F9EULL, + 0x000FCBA8D85E11B2ULL, 0x000FC7D26ECD2D22ULL, 0x000FC32B2F1E22EDULL, + 0x000FBD6581C0B83AULL, 0x000FB606C4005434ULL, 0x000FAC40582A2874ULL, + 0x000F9E971E014598ULL, 0x000F89FA48A41DFCULL, 0x000F66C5F7F0302CULL, + 0x000F1A5A4B331C4AULL}; + +static const double wi_double[] = { + 8.68362706080130616677e-16, 4.77933017572773682428e-17, + 6.35435241740526230246e-17, 7.45487048124769627714e-17, + 8.32936681579309972857e-17, 9.06806040505948228243e-17, + 9.71486007656776183958e-17, 1.02947503142410192108e-16, + 1.08234302884476839838e-16, 1.13114701961090307945e-16, + 1.17663594570229211411e-16, 1.21936172787143633280e-16, + 1.25974399146370927864e-16, 1.29810998862640315416e-16, + 1.33472037368241227547e-16, 1.36978648425712032797e-16, + 1.40348230012423820659e-16, 1.43595294520569430270e-16, + 1.46732087423644219083e-16, 1.49769046683910367425e-16, + 1.52715150035961979750e-16, 1.55578181694607639484e-16, + 1.58364940092908853989e-16, 1.61081401752749279325e-16, + 1.63732852039698532012e-16, 1.66323990584208352778e-16, + 1.68859017086765964015e-16, 1.71341701765596607184e-16, + 1.73775443658648593310e-16, 1.76163319230009959832e-16, + 1.78508123169767272927e-16, 1.80812402857991522674e-16, + 1.83078487648267501776e-16, 1.85308513886180189386e-16, + 1.87504446393738816849e-16, 1.89668097007747596212e-16, + 1.91801140648386198029e-16, 1.93905129306251037069e-16, + 1.95981504266288244037e-16, 1.98031606831281739736e-16, + 2.00056687762733300198e-16, 2.02057915620716538808e-16, + 2.04036384154802118313e-16, 2.05993118874037063144e-16, + 2.07929082904140197311e-16, 2.09845182223703516690e-16, + 2.11742270357603418769e-16, 2.13621152594498681022e-16, + 2.15482589785814580926e-16, 2.17327301775643674990e-16, + 2.19155970504272708519e-16, 2.20969242822353175995e-16, + 2.22767733047895534948e-16, 2.24552025294143552381e-16, + 2.26322675592856786566e-16, 2.28080213834501706782e-16, + 2.29825145544246839061e-16, 2.31557953510408037008e-16, + 2.33279099280043561128e-16, 2.34989024534709550938e-16, + 2.36688152357916037468e-16, 2.38376888404542434981e-16, + 2.40055621981350627349e-16, 2.41724727046750252175e-16, + 2.43384563137110286400e-16, 2.45035476226149539878e-16, + 2.46677799523270498158e-16, 2.48311854216108767769e-16, + 2.49937950162045242375e-16, 2.51556386532965786439e-16, + 2.53167452417135826983e-16, 2.54771427381694417303e-16, + 2.56368581998939683749e-16, 2.57959178339286723500e-16, + 2.59543470433517070146e-16, 2.61121704706701939097e-16, + 2.62694120385972564623e-16, 2.64260949884118951286e-16, + 2.65822419160830680292e-16, 2.67378748063236329361e-16, + 2.68930150647261591777e-16, 2.70476835481199518794e-16, + 2.72019005932773206655e-16, 2.73556860440867908686e-16, + 2.75090592773016664571e-16, 2.76620392269639032183e-16, + 2.78146444075954410103e-16, 2.79668929362423005309e-16, + 2.81188025534502074329e-16, 2.82703906432447923059e-16, + 2.84216742521840606520e-16, 2.85726701075460149289e-16, + 2.87233946347097994381e-16, 2.88738639737848191815e-16, + 2.90240939955384233230e-16, 2.91741003166694553259e-16, + 2.93238983144718163965e-16, 2.94735031409293489611e-16, + 2.96229297362806647792e-16, 2.97721928420902891115e-16, + 2.99213070138601307081e-16, 3.00702866332133102993e-16, + 3.02191459196806151971e-16, 3.03678989421180184427e-16, + 3.05165596297821922381e-16, 3.06651417830895451744e-16, + 3.08136590840829717032e-16, 3.09621251066292253306e-16, + 3.11105533263689296831e-16, 3.12589571304399892784e-16, + 3.14073498269944617203e-16, 3.15557446545280064031e-16, + 3.17041547910402852545e-16, 3.18525933630440648871e-16, + 3.20010734544401137886e-16, 3.21496081152744704901e-16, + 3.22982103703941557538e-16, 3.24468932280169778077e-16, + 3.25956696882307838340e-16, 3.27445527514370671802e-16, + 3.28935554267536967851e-16, 3.30426907403912838589e-16, + 3.31919717440175233652e-16, 3.33414115231237245918e-16, + 3.34910232054077845412e-16, 3.36408199691876507948e-16, + 3.37908150518594979994e-16, 3.39410217584148914282e-16, + 3.40914534700312603713e-16, 3.42421236527501816058e-16, + 3.43930458662583133920e-16, 3.45442337727858401604e-16, + 3.46957011461378353333e-16, 3.48474618808741370700e-16, + 3.49995300016538099813e-16, 3.51519196727607440975e-16, + 3.53046452078274009054e-16, 3.54577210797743572160e-16, + 3.56111619309838843415e-16, 3.57649825837265051035e-16, + 3.59191980508602994994e-16, 3.60738235468235137839e-16, + 3.62288744989419151904e-16, 3.63843665590734438546e-16, + 3.65403156156136995766e-16, 3.66967378058870090021e-16, + 3.68536495289491401456e-16, 3.70110674588289834952e-16, + 3.71690085582382297792e-16, 3.73274900927794352614e-16, + 3.74865296456848868882e-16, 3.76461451331202869131e-16, + 3.78063548200896037651e-16, 3.79671773369794425924e-16, + 3.81286316967837738238e-16, 3.82907373130524317507e-16, + 3.84535140186095955858e-16, 3.86169820850914927119e-16, + 3.87811622433558721164e-16, 3.89460757048192620674e-16, + 3.91117441837820542060e-16, 3.92781899208054153270e-16, + 3.94454357072087711446e-16, 3.96135049107613542983e-16, + 3.97824215026468259474e-16, 3.99522100857856502444e-16, + 4.01228959246062907451e-16, 4.02945049763632792393e-16, + 4.04670639241074995115e-16, 4.06406002114225038723e-16, + 4.08151420790493873480e-16, 4.09907186035326643447e-16, + 4.11673597380302570170e-16, 4.13450963554423599878e-16, + 4.15239602940268833891e-16, 4.17039844056831587498e-16, + 4.18852026071011229572e-16, 4.20676499339901510978e-16, + 4.22513625986204937320e-16, 4.24363780509307796137e-16, + 4.26227350434779809917e-16, 4.28104737005311666397e-16, + 4.29996355916383230161e-16, 4.31902638100262944617e-16, + 4.33824030562279080411e-16, 4.35760997273684900553e-16, + 4.37714020125858747008e-16, 4.39683599951052137423e-16, + 4.41670257615420348435e-16, 4.43674535190656726604e-16, + 4.45696997211204306674e-16, 4.47738232024753387312e-16, + 4.49798853244554968009e-16, 4.51879501313005876278e-16, + 4.53980845187003400947e-16, 4.56103584156742206384e-16, + 4.58248449810956667052e-16, 4.60416208163115281428e-16, + 4.62607661954784567754e-16, 4.64823653154320737780e-16, + 4.67065065671263059081e-16, 4.69332828309332890697e-16, + 4.71627917983835129766e-16, 4.73951363232586715165e-16, + 4.76304248053313737663e-16, 4.78687716104872284247e-16, + 4.81102975314741720538e-16, 4.83551302941152515162e-16, + 4.86034051145081195402e-16, 4.88552653135360343280e-16, + 4.91108629959526955862e-16, 4.93703598024033454728e-16, + 4.96339277440398725619e-16, 4.99017501309182245754e-16, + 5.01740226071808946011e-16, 5.04509543081872748637e-16, + 5.07327691573354207058e-16, 5.10197073234156184149e-16, + 5.13120268630678373200e-16, 5.16100055774322824569e-16, + 5.19139431175769859873e-16, 5.22241633800023428760e-16, + 5.25410172417759732697e-16, 5.28648856950494511482e-16, + 5.31961834533840037535e-16, 5.35353631181649688145e-16, + 5.38829200133405320160e-16, 5.42393978220171234073e-16, + 5.46053951907478041166e-16, 5.49815735089281410703e-16, + 5.53686661246787600374e-16, 5.57674893292657647836e-16, + 5.61789555355541665830e-16, 5.66040892008242216739e-16, + 5.70440462129138908417e-16, 5.75001376891989523684e-16, + 5.79738594572459365014e-16, 5.84669289345547900201e-16, + 5.89813317647789942685e-16, 5.95193814964144415532e-16, + 6.00837969627190832234e-16, 6.06778040933344851394e-16, + 6.13052720872528159123e-16, 6.19708989458162555387e-16, + 6.26804696330128439415e-16, 6.34412240712750598627e-16, + 6.42623965954805540945e-16, 6.51560331734499356881e-16, + 6.61382788509766415145e-16, 6.72315046250558662913e-16, + 6.84680341756425875856e-16, 6.98971833638761995415e-16, + 7.15999493483066421560e-16, 7.37242430179879890722e-16, + 7.65893637080557275482e-16, 8.11384933765648418565e-16}; + +static const double fi_double[] = { + 1.00000000000000000000e+00, 9.77101701267671596263e-01, + 9.59879091800106665211e-01, 9.45198953442299649730e-01, + 9.32060075959230460718e-01, 9.19991505039347012840e-01, + 9.08726440052130879366e-01, 8.98095921898343418910e-01, + 8.87984660755833377088e-01, 8.78309655808917399966e-01, + 8.69008688036857046555e-01, 8.60033621196331532488e-01, + 8.51346258458677951353e-01, 8.42915653112204177333e-01, + 8.34716292986883434679e-01, 8.26726833946221373317e-01, + 8.18929191603702366642e-01, 8.11307874312656274185e-01, + 8.03849483170964274059e-01, 7.96542330422958966274e-01, + 7.89376143566024590648e-01, 7.82341832654802504798e-01, + 7.75431304981187174974e-01, 7.68637315798486264740e-01, + 7.61953346836795386565e-01, 7.55373506507096115214e-01, + 7.48892447219156820459e-01, 7.42505296340151055290e-01, + 7.36207598126862650112e-01, 7.29995264561476231435e-01, + 7.23864533468630222401e-01, 7.17811932630721960535e-01, + 7.11834248878248421200e-01, 7.05928501332754310127e-01, + 7.00091918136511615067e-01, 6.94321916126116711609e-01, + 6.88616083004671808432e-01, 6.82972161644994857355e-01, + 6.77388036218773526009e-01, 6.71861719897082099173e-01, + 6.66391343908750100056e-01, 6.60975147776663107813e-01, + 6.55611470579697264149e-01, 6.50298743110816701574e-01, + 6.45035480820822293424e-01, 6.39820277453056585060e-01, + 6.34651799287623608059e-01, 6.29528779924836690007e-01, + 6.24450015547026504592e-01, 6.19414360605834324325e-01, + 6.14420723888913888899e-01, 6.09468064925773433949e-01, + 6.04555390697467776029e-01, 5.99681752619125263415e-01, + 5.94846243767987448159e-01, 5.90047996332826008015e-01, + 5.85286179263371453274e-01, 5.80559996100790898232e-01, + 5.75868682972353718164e-01, 5.71211506735253227163e-01, + 5.66587763256164445025e-01, 5.61996775814524340831e-01, + 5.57437893618765945014e-01, 5.52910490425832290562e-01, + 5.48413963255265812791e-01, 5.43947731190026262382e-01, + 5.39511234256952132426e-01, 5.35103932380457614215e-01, + 5.30725304403662057062e-01, 5.26374847171684479008e-01, + 5.22052074672321841931e-01, 5.17756517229756352272e-01, + 5.13487720747326958914e-01, 5.09245245995747941592e-01, + 5.05028667943468123624e-01, 5.00837575126148681903e-01, + 4.96671569052489714213e-01, 4.92530263643868537748e-01, + 4.88413284705458028423e-01, 4.84320269426683325253e-01, + 4.80250865909046753544e-01, 4.76204732719505863248e-01, + 4.72181538467730199660e-01, 4.68180961405693596422e-01, + 4.64202689048174355069e-01, 4.60246417812842867345e-01, + 4.56311852678716434184e-01, 4.52398706861848520777e-01, + 4.48506701507203064949e-01, 4.44635565395739396077e-01, + 4.40785034665803987508e-01, 4.36954852547985550526e-01, + 4.33144769112652261445e-01, 4.29354541029441427735e-01, + 4.25583931338021970170e-01, 4.21832709229495894654e-01, + 4.18100649837848226120e-01, 4.14387534040891125642e-01, + 4.10693148270188157500e-01, 4.07017284329473372217e-01, + 4.03359739221114510510e-01, 3.99720314980197222177e-01, + 3.96098818515832451492e-01, 3.92495061459315619512e-01, + 3.88908860018788715696e-01, 3.85340034840077283462e-01, + 3.81788410873393657674e-01, 3.78253817245619183840e-01, + 3.74736087137891138443e-01, 3.71235057668239498696e-01, + 3.67750569779032587814e-01, 3.64282468129004055601e-01, + 3.60830600989648031529e-01, 3.57394820145780500731e-01, + 3.53974980800076777232e-01, 3.50570941481406106455e-01, + 3.47182563956793643900e-01, 3.43809713146850715049e-01, + 3.40452257044521866547e-01, 3.37110066637006045021e-01, + 3.33783015830718454708e-01, 3.30470981379163586400e-01, + 3.27173842813601400970e-01, 3.23891482376391093290e-01, + 3.20623784956905355514e-01, 3.17370638029913609834e-01, + 3.14131931596337177215e-01, 3.10907558126286509559e-01, + 3.07697412504292056035e-01, 3.04501391976649993243e-01, + 3.01319396100803049698e-01, 2.98151326696685481377e-01, + 2.94997087799961810184e-01, 2.91856585617095209972e-01, + 2.88729728482182923521e-01, 2.85616426815501756042e-01, + 2.82516593083707578948e-01, 2.79430141761637940157e-01, + 2.76356989295668320494e-01, 2.73297054068577072172e-01, + 2.70250256365875463072e-01, 2.67216518343561471038e-01, + 2.64195763997261190426e-01, 2.61187919132721213522e-01, + 2.58192911337619235290e-01, 2.55210669954661961700e-01, + 2.52241126055942177508e-01, 2.49284212418528522415e-01, + 2.46339863501263828249e-01, 2.43408015422750312329e-01, + 2.40488605940500588254e-01, 2.37581574431238090606e-01, + 2.34686861872330010392e-01, 2.31804410824338724684e-01, + 2.28934165414680340644e-01, 2.26076071322380278694e-01, + 2.23230075763917484855e-01, 2.20396127480151998723e-01, + 2.17574176724331130872e-01, 2.14764175251173583536e-01, + 2.11966076307030182324e-01, 2.09179834621125076977e-01, + 2.06405406397880797353e-01, 2.03642749310334908452e-01, + 2.00891822494656591136e-01, 1.98152586545775138971e-01, + 1.95425003514134304483e-01, 1.92709036903589175926e-01, + 1.90004651670464985713e-01, 1.87311814223800304768e-01, + 1.84630492426799269756e-01, 1.81960655599522513892e-01, + 1.79302274522847582272e-01, 1.76655321443734858455e-01, + 1.74019770081838553999e-01, 1.71395595637505754327e-01, + 1.68782774801211288285e-01, 1.66181285764481906364e-01, + 1.63591108232365584074e-01, 1.61012223437511009516e-01, + 1.58444614155924284882e-01, 1.55888264724479197465e-01, + 1.53343161060262855866e-01, 1.50809290681845675763e-01, + 1.48286642732574552861e-01, 1.45775208005994028060e-01, + 1.43274978973513461566e-01, 1.40785949814444699690e-01, + 1.38308116448550733057e-01, 1.35841476571253755301e-01, + 1.33386029691669155683e-01, 1.30941777173644358090e-01, + 1.28508722279999570981e-01, 1.26086870220185887081e-01, + 1.23676228201596571932e-01, 1.21276805484790306533e-01, + 1.18888613442910059947e-01, 1.16511665625610869035e-01, + 1.14145977827838487895e-01, 1.11791568163838089811e-01, + 1.09448457146811797824e-01, 1.07116667774683801961e-01, + 1.04796225622487068629e-01, 1.02487158941935246892e-01, + 1.00189498768810017482e-01, 9.79032790388624646338e-02, + 9.56285367130089991594e-02, 9.33653119126910124859e-02, + 9.11136480663737591268e-02, 8.88735920682758862021e-02, + 8.66451944505580717859e-02, 8.44285095703534715916e-02, + 8.22235958132029043366e-02, 8.00305158146630696292e-02, + 7.78493367020961224423e-02, 7.56801303589271778804e-02, + 7.35229737139813238622e-02, 7.13779490588904025339e-02, + 6.92451443970067553879e-02, 6.71246538277884968737e-02, + 6.50165779712428976156e-02, 6.29210244377581412456e-02, + 6.08381083495398780614e-02, 5.87679529209337372930e-02, + 5.67106901062029017391e-02, 5.46664613248889208474e-02, + 5.26354182767921896513e-02, 5.06177238609477817000e-02, + 4.86135532158685421122e-02, 4.66230949019303814174e-02, + 4.46465522512944634759e-02, 4.26841449164744590750e-02, + 4.07361106559409394401e-02, 3.88027074045261474722e-02, + 3.68842156885673053135e-02, 3.49809414617161251737e-02, + 3.30932194585785779961e-02, 3.12214171919203004046e-02, + 2.93659397581333588001e-02, 2.75272356696031131329e-02, + 2.57058040085489103443e-02, 2.39022033057958785407e-02, + 2.21170627073088502113e-02, 2.03510962300445102935e-02, + 1.86051212757246224594e-02, 1.68800831525431419000e-02, + 1.51770883079353092332e-02, 1.34974506017398673818e-02, + 1.18427578579078790488e-02, 1.02149714397014590439e-02, + 8.61658276939872638800e-03, 7.05087547137322242369e-03, + 5.52240329925099155545e-03, 4.03797259336302356153e-03, + 2.60907274610215926189e-03, 1.26028593049859797236e-03}; + +static const uint32_t ki_float[] = { + 0x007799ECUL, 0x00000000UL, 0x006045F5UL, 0x006D1AA8UL, 0x00728FB4UL, + 0x007592AFUL, 0x00777A5CUL, 0x0078CA38UL, 0x0079BF6BUL, 0x007A7A35UL, + 0x007B0D2FUL, 0x007B83D4UL, 0x007BE597UL, 0x007C3788UL, 0x007C7D33UL, + 0x007CB926UL, 0x007CED48UL, 0x007D1B08UL, 0x007D437FUL, 0x007D678BUL, + 0x007D87DBUL, 0x007DA4FCUL, 0x007DBF61UL, 0x007DD767UL, 0x007DED5DUL, + 0x007E0183UL, 0x007E1411UL, 0x007E2534UL, 0x007E3515UL, 0x007E43D5UL, + 0x007E5193UL, 0x007E5E67UL, 0x007E6A69UL, 0x007E75AAUL, 0x007E803EUL, + 0x007E8A32UL, 0x007E9395UL, 0x007E9C72UL, 0x007EA4D5UL, 0x007EACC6UL, + 0x007EB44EUL, 0x007EBB75UL, 0x007EC243UL, 0x007EC8BCUL, 0x007ECEE8UL, + 0x007ED4CCUL, 0x007EDA6BUL, 0x007EDFCBUL, 0x007EE4EFUL, 0x007EE9DCUL, + 0x007EEE94UL, 0x007EF31BUL, 0x007EF774UL, 0x007EFBA0UL, 0x007EFFA3UL, + 0x007F037FUL, 0x007F0736UL, 0x007F0ACAUL, 0x007F0E3CUL, 0x007F118FUL, + 0x007F14C4UL, 0x007F17DCUL, 0x007F1ADAUL, 0x007F1DBDUL, 0x007F2087UL, + 0x007F233AUL, 0x007F25D7UL, 0x007F285DUL, 0x007F2AD0UL, 0x007F2D2EUL, + 0x007F2F7AUL, 0x007F31B3UL, 0x007F33DCUL, 0x007F35F3UL, 0x007F37FBUL, + 0x007F39F3UL, 0x007F3BDCUL, 0x007F3DB7UL, 0x007F3F84UL, 0x007F4145UL, + 0x007F42F8UL, 0x007F449FUL, 0x007F463AUL, 0x007F47CAUL, 0x007F494EUL, + 0x007F4AC8UL, 0x007F4C38UL, 0x007F4D9DUL, 0x007F4EF9UL, 0x007F504CUL, + 0x007F5195UL, 0x007F52D5UL, 0x007F540DUL, 0x007F553DUL, 0x007F5664UL, + 0x007F5784UL, 0x007F589CUL, 0x007F59ACUL, 0x007F5AB5UL, 0x007F5BB8UL, + 0x007F5CB3UL, 0x007F5DA8UL, 0x007F5E96UL, 0x007F5F7EUL, 0x007F605FUL, + 0x007F613BUL, 0x007F6210UL, 0x007F62E0UL, 0x007F63AAUL, 0x007F646FUL, + 0x007F652EUL, 0x007F65E8UL, 0x007F669CUL, 0x007F674CUL, 0x007F67F6UL, + 0x007F689CUL, 0x007F693CUL, 0x007F69D9UL, 0x007F6A70UL, 0x007F6B03UL, + 0x007F6B91UL, 0x007F6C1BUL, 0x007F6CA0UL, 0x007F6D21UL, 0x007F6D9EUL, + 0x007F6E17UL, 0x007F6E8CUL, 0x007F6EFCUL, 0x007F6F68UL, 0x007F6FD1UL, + 0x007F7035UL, 0x007F7096UL, 0x007F70F3UL, 0x007F714CUL, 0x007F71A1UL, + 0x007F71F2UL, 0x007F723FUL, 0x007F7289UL, 0x007F72CFUL, 0x007F7312UL, + 0x007F7350UL, 0x007F738BUL, 0x007F73C3UL, 0x007F73F6UL, 0x007F7427UL, + 0x007F7453UL, 0x007F747CUL, 0x007F74A1UL, 0x007F74C3UL, 0x007F74E0UL, + 0x007F74FBUL, 0x007F7511UL, 0x007F7524UL, 0x007F7533UL, 0x007F753FUL, + 0x007F7546UL, 0x007F754AUL, 0x007F754BUL, 0x007F7547UL, 0x007F753FUL, + 0x007F7534UL, 0x007F7524UL, 0x007F7511UL, 0x007F74F9UL, 0x007F74DEUL, + 0x007F74BEUL, 0x007F749AUL, 0x007F7472UL, 0x007F7445UL, 0x007F7414UL, + 0x007F73DFUL, 0x007F73A5UL, 0x007F7366UL, 0x007F7323UL, 0x007F72DAUL, + 0x007F728DUL, 0x007F723AUL, 0x007F71E3UL, 0x007F7186UL, 0x007F7123UL, + 0x007F70BBUL, 0x007F704DUL, 0x007F6FD9UL, 0x007F6F5FUL, 0x007F6EDFUL, + 0x007F6E58UL, 0x007F6DCBUL, 0x007F6D37UL, 0x007F6C9CUL, 0x007F6BF9UL, + 0x007F6B4FUL, 0x007F6A9CUL, 0x007F69E2UL, 0x007F691FUL, 0x007F6854UL, + 0x007F677FUL, 0x007F66A1UL, 0x007F65B8UL, 0x007F64C6UL, 0x007F63C8UL, + 0x007F62C0UL, 0x007F61ABUL, 0x007F608AUL, 0x007F5F5DUL, 0x007F5E21UL, + 0x007F5CD8UL, 0x007F5B7FUL, 0x007F5A17UL, 0x007F589EUL, 0x007F5713UL, + 0x007F5575UL, 0x007F53C4UL, 0x007F51FEUL, 0x007F5022UL, 0x007F4E2FUL, + 0x007F4C22UL, 0x007F49FAUL, 0x007F47B6UL, 0x007F4553UL, 0x007F42CFUL, + 0x007F4028UL, 0x007F3D5AUL, 0x007F3A64UL, 0x007F3741UL, 0x007F33EDUL, + 0x007F3065UL, 0x007F2CA4UL, 0x007F28A4UL, 0x007F245FUL, 0x007F1FCEUL, + 0x007F1AEAUL, 0x007F15A9UL, 0x007F1000UL, 0x007F09E4UL, 0x007F0346UL, + 0x007EFC16UL, 0x007EF43EUL, 0x007EEBA8UL, 0x007EE237UL, 0x007ED7C8UL, + 0x007ECC2FUL, 0x007EBF37UL, 0x007EB09DUL, 0x007EA00AUL, 0x007E8D0DUL, + 0x007E7710UL, 0x007E5D47UL, 0x007E3E93UL, 0x007E1959UL, 0x007DEB2CUL, + 0x007DB036UL, 0x007D6203UL, 0x007CF4B9UL, 0x007C4FD2UL, 0x007B3630UL, + 0x0078D2D2UL}; + +static const float wi_float[] = { + 4.66198677960027669255e-07f, 2.56588335019207033255e-08f, + 3.41146697750176784592e-08f, 4.00230311410932959821e-08f, + 4.47179475877737745459e-08f, 4.86837785973537366722e-08f, + 5.21562578925932412861e-08f, 5.52695199001886257153e-08f, + 5.81078488992733116465e-08f, 6.07279932024587421409e-08f, + 6.31701613261172047795e-08f, 6.54639842900233842742e-08f, + 6.76319905583641815324e-08f, 6.96917493470166688656e-08f, + 7.16572544283857476692e-08f, 7.35398519048393832969e-08f, + 7.53488822443557479279e-08f, 7.70921367281667127885e-08f, + 7.87761895947956022626e-08f, 8.04066446825615346857e-08f, + 8.19883218760237408659e-08f, 8.35254002936857088917e-08f, + 8.50215298165053411740e-08f, 8.64799190652369040985e-08f, + 8.79034055989140110861e-08f, 8.92945125124233511541e-08f, + 9.06554945027956262312e-08f, 9.19883756905278607229e-08f, + 9.32949809202232869780e-08f, 9.45769618559625849039e-08f, + 9.58358188855612866442e-08f, 9.70729196232813152662e-08f, + 9.82895146313061088986e-08f, 9.94867508514382224721e-08f, + 1.00665683139461669691e-07f, 1.01827284217853923044e-07f, + 1.02972453302539369464e-07f, 1.04102023612124921572e-07f, + 1.05216768930574060431e-07f, 1.06317409364335657741e-07f, + 1.07404616410877866490e-07f, 1.08479017436113134283e-07f, + 1.09541199642370962438e-07f, 1.10591713595628691212e-07f, + 1.11631076370069356306e-07f, 1.12659774359245895023e-07f, + 1.13678265795837113569e-07f, 1.14686983015899673063e-07f, + 1.15686334498432158725e-07f, 1.16676706706789039179e-07f, + 1.17658465754873988919e-07f, 1.18631958917986203582e-07f, + 1.19597516005596215528e-07f, 1.20555450611113917226e-07f, + 1.21506061251817163689e-07f, 1.22449632410483948386e-07f, + 1.23386435488872536840e-07f, 1.24316729681986364321e-07f, + 1.25240762781015530062e-07f, 1.26158771911939892267e-07f, + 1.27070984215989333455e-07f, 1.27977617477468922011e-07f, + 1.28878880703854958297e-07f, 1.29774974662539874521e-07f, + 1.30666092378141980504e-07f, 1.31552419593887221722e-07f, + 1.32434135200211397569e-07f, 1.33311411633413359243e-07f, + 1.34184415246907777059e-07f, 1.35053306657377859830e-07f, + 1.35918241067904315860e-07f, 1.36779368569952053923e-07f, + 1.37636834425917531047e-07f, 1.38490779333783508675e-07f, + 1.39341339675287344817e-07f, 1.40188647748881762555e-07f, + 1.41032831988654882776e-07f, 1.41874017170273235693e-07f, + 1.42712324604921442006e-07f, 1.43547872322127921816e-07f, + 1.44380775242292721080e-07f, 1.45211145339665544509e-07f, + 1.46039091796461362146e-07f, 1.46864721148745476208e-07f, + 1.47688137424670065700e-07f, 1.48509442275598857119e-07f, + 1.49328735100614641423e-07f, 1.50146113164867617390e-07f, + 1.50961671712187416111e-07f, 1.51775504072350982845e-07f, + 1.52587701763369746341e-07f, 1.53398354589133671168e-07f, + 1.54207550732725568797e-07f, 1.55015376845697999657e-07f, + 1.55821918133584372604e-07f, 1.56627258437898192833e-07f, + 1.57431480314857468671e-07f, 1.58234665111056041043e-07f, + 1.59036893036289199880e-07f, 1.59838243233728855017e-07f, + 1.60638793847630850137e-07f, 1.61438622088746393909e-07f, + 1.62237804297600106296e-07f, 1.63036416005787357730e-07f, + 1.63834531995435479082e-07f, 1.64632226356965902954e-07f, + 1.65429572545287097020e-07f, 1.66226643434541294491e-07f, + 1.67023511371523209274e-07f, 1.67820248227882200051e-07f, + 1.68616925451215588827e-07f, 1.69413614115155757272e-07f, + 1.70210384968549673733e-07f, 1.71007308483826142122e-07f, + 1.71804454904642543391e-07f, 1.72601894292900061024e-07f, + 1.73399696575213681990e-07f, 1.74197931588920988271e-07f, + 1.74996669127712165834e-07f, 1.75795978986961275677e-07f, + 1.76595931008838063924e-07f, 1.77396595127278238022e-07f, + 1.78198041412889183130e-07f, 1.79000340117867431104e-07f, + 1.79803561721004406185e-07f, 1.80607776972855859813e-07f, + 1.81413056941151359868e-07f, 1.82219473056520464354e-07f, + 1.83027097158612474240e-07f, 1.83836001542687613069e-07f, + 1.84646259006759307383e-07f, 1.85457942899367347876e-07f, + 1.86271127168064649331e-07f, 1.87085886408701333260e-07f, + 1.87902295915592424729e-07f, 1.88720431732658022414e-07f, + 1.89540370705627262627e-07f, 1.90362190535400839128e-07f, + 1.91185969832669990437e-07f, 1.92011788173893651535e-07f, + 1.92839726158739913768e-07f, 1.93669865469102145482e-07f, + 1.94502288929804890433e-07f, 1.95337080571120616772e-07f, + 1.96174325693223683314e-07f, 1.97014110932714374919e-07f, + 1.97856524331352952716e-07f, 1.98701655407150388211e-07f, + 1.99549595227971635348e-07f, 2.00400436487814600236e-07f, + 2.01254273585938820883e-07f, 2.02111202709026498408e-07f, + 2.02971321916571014951e-07f, 2.03834731229698846698e-07f, + 2.04701532723644121196e-07f, 2.05571830624108885378e-07f, + 2.06445731407757185541e-07f, 2.07323343907107312957e-07f, + 2.08204779420104330037e-07f, 2.09090151824673600213e-07f, + 2.09979577698577670508e-07f, 2.10873176444920111011e-07f, + 2.11771070423665379388e-07f, 2.12673385089569268965e-07f, + 2.13580249136944118603e-07f, 2.14491794651713402832e-07f, + 2.15408157271244625533e-07f, 2.16329476352486921685e-07f, + 2.17255895148978920488e-07f, 2.18187560997337924713e-07f, + 2.19124625513888206785e-07f, 2.20067244802139479285e-07f, + 2.21015579671883851683e-07f, 2.21969795870742159701e-07f, + 2.22930064329060010376e-07f, 2.23896561419128954210e-07f, + 2.24869469229791575583e-07f, 2.25848975857580322189e-07f, + 2.26835275715640744118e-07f, 2.27828569861799901001e-07f, + 2.28829066347263833069e-07f, 2.29836980587561823183e-07f, + 2.30852535757505260518e-07f, 2.31875963212094114516e-07f, + 2.32907502935486642699e-07f, 2.33947404020352726160e-07f, + 2.34995925180156140289e-07f, 2.36053335297164516378e-07f, + 2.37119914009265667728e-07f, 2.38195952338983970691e-07f, + 2.39281753368440712742e-07f, 2.40377632964396957621e-07f, + 2.41483920557958384709e-07f, 2.42600959984018662258e-07f, + 2.43729110386077326413e-07f, 2.44868747192698939290e-07f, + 2.46020263172594533433e-07f, 2.47184069576113545901e-07f, + 2.48360597371852893654e-07f, 2.49550298588131851232e-07f, + 2.50753647770270890721e-07f, 2.51971143565970967140e-07f, + 2.53203310452642767375e-07f, 2.54450700622322097890e-07f, + 2.55713896041856770961e-07f, 2.56993510708419870887e-07f, + 2.58290193123138874550e-07f, 2.59604629008804833146e-07f, + 2.60937544301314385690e-07f, 2.62289708448800566945e-07f, + 2.63661938057441759882e-07f, 2.65055100928844238758e-07f, + 2.66470120540847889467e-07f, 2.67907981031821866252e-07f, + 2.69369732758258246335e-07f, 2.70856498507068313229e-07f, + 2.72369480457841388042e-07f, 2.73909968006952220135e-07f, + 2.75479346585437289399e-07f, 2.77079107626811561009e-07f, + 2.78710859870496796972e-07f, 2.80376342222588603820e-07f, + 2.82077438439999912690e-07f, 2.83816193958769527230e-07f, + 2.85594835255375795814e-07f, 2.87415792215003905739e-07f, + 2.89281724087851835900e-07f, 2.91195549750371467233e-07f, + 2.93160483161771875581e-07f, 2.95180075129332912389e-07f, + 2.97258262785797916083e-07f, 2.99399428561531794298e-07f, + 3.01608470935804138388e-07f, 3.03890889921758510417e-07f, + 3.06252891144972267537e-07f, 3.08701513613258141075e-07f, + 3.11244787989714509378e-07f, 3.13891934589336184321e-07f, + 3.16653613755314681314e-07f, 3.19542246256559459667e-07f, + 3.22572428717978242099e-07f, 3.25761480217458181578e-07f, + 3.29130173358915628534e-07f, 3.32703730345002116955e-07f, + 3.36513208964639108346e-07f, 3.40597478255417943913e-07f, + 3.45006114675213401550e-07f, 3.49803789521323211592e-07f, + 3.55077180848341416206e-07f, 3.60946392031859609868e-07f, + 3.67584959507244041831e-07f, 3.75257645787954431030e-07f, + 3.84399301057791926300e-07f, 3.95804015855768440983e-07f, + 4.11186015434435801956e-07f, 4.35608969373823260746e-07f}; + +static const float fi_float[] = { + 1.00000000000000000000e+00f, 9.77101701267671596263e-01f, + 9.59879091800106665211e-01f, 9.45198953442299649730e-01f, + 9.32060075959230460718e-01f, 9.19991505039347012840e-01f, + 9.08726440052130879366e-01f, 8.98095921898343418910e-01f, + 8.87984660755833377088e-01f, 8.78309655808917399966e-01f, + 8.69008688036857046555e-01f, 8.60033621196331532488e-01f, + 8.51346258458677951353e-01f, 8.42915653112204177333e-01f, + 8.34716292986883434679e-01f, 8.26726833946221373317e-01f, + 8.18929191603702366642e-01f, 8.11307874312656274185e-01f, + 8.03849483170964274059e-01f, 7.96542330422958966274e-01f, + 7.89376143566024590648e-01f, 7.82341832654802504798e-01f, + 7.75431304981187174974e-01f, 7.68637315798486264740e-01f, + 7.61953346836795386565e-01f, 7.55373506507096115214e-01f, + 7.48892447219156820459e-01f, 7.42505296340151055290e-01f, + 7.36207598126862650112e-01f, 7.29995264561476231435e-01f, + 7.23864533468630222401e-01f, 7.17811932630721960535e-01f, + 7.11834248878248421200e-01f, 7.05928501332754310127e-01f, + 7.00091918136511615067e-01f, 6.94321916126116711609e-01f, + 6.88616083004671808432e-01f, 6.82972161644994857355e-01f, + 6.77388036218773526009e-01f, 6.71861719897082099173e-01f, + 6.66391343908750100056e-01f, 6.60975147776663107813e-01f, + 6.55611470579697264149e-01f, 6.50298743110816701574e-01f, + 6.45035480820822293424e-01f, 6.39820277453056585060e-01f, + 6.34651799287623608059e-01f, 6.29528779924836690007e-01f, + 6.24450015547026504592e-01f, 6.19414360605834324325e-01f, + 6.14420723888913888899e-01f, 6.09468064925773433949e-01f, + 6.04555390697467776029e-01f, 5.99681752619125263415e-01f, + 5.94846243767987448159e-01f, 5.90047996332826008015e-01f, + 5.85286179263371453274e-01f, 5.80559996100790898232e-01f, + 5.75868682972353718164e-01f, 5.71211506735253227163e-01f, + 5.66587763256164445025e-01f, 5.61996775814524340831e-01f, + 5.57437893618765945014e-01f, 5.52910490425832290562e-01f, + 5.48413963255265812791e-01f, 5.43947731190026262382e-01f, + 5.39511234256952132426e-01f, 5.35103932380457614215e-01f, + 5.30725304403662057062e-01f, 5.26374847171684479008e-01f, + 5.22052074672321841931e-01f, 5.17756517229756352272e-01f, + 5.13487720747326958914e-01f, 5.09245245995747941592e-01f, + 5.05028667943468123624e-01f, 5.00837575126148681903e-01f, + 4.96671569052489714213e-01f, 4.92530263643868537748e-01f, + 4.88413284705458028423e-01f, 4.84320269426683325253e-01f, + 4.80250865909046753544e-01f, 4.76204732719505863248e-01f, + 4.72181538467730199660e-01f, 4.68180961405693596422e-01f, + 4.64202689048174355069e-01f, 4.60246417812842867345e-01f, + 4.56311852678716434184e-01f, 4.52398706861848520777e-01f, + 4.48506701507203064949e-01f, 4.44635565395739396077e-01f, + 4.40785034665803987508e-01f, 4.36954852547985550526e-01f, + 4.33144769112652261445e-01f, 4.29354541029441427735e-01f, + 4.25583931338021970170e-01f, 4.21832709229495894654e-01f, + 4.18100649837848226120e-01f, 4.14387534040891125642e-01f, + 4.10693148270188157500e-01f, 4.07017284329473372217e-01f, + 4.03359739221114510510e-01f, 3.99720314980197222177e-01f, + 3.96098818515832451492e-01f, 3.92495061459315619512e-01f, + 3.88908860018788715696e-01f, 3.85340034840077283462e-01f, + 3.81788410873393657674e-01f, 3.78253817245619183840e-01f, + 3.74736087137891138443e-01f, 3.71235057668239498696e-01f, + 3.67750569779032587814e-01f, 3.64282468129004055601e-01f, + 3.60830600989648031529e-01f, 3.57394820145780500731e-01f, + 3.53974980800076777232e-01f, 3.50570941481406106455e-01f, + 3.47182563956793643900e-01f, 3.43809713146850715049e-01f, + 3.40452257044521866547e-01f, 3.37110066637006045021e-01f, + 3.33783015830718454708e-01f, 3.30470981379163586400e-01f, + 3.27173842813601400970e-01f, 3.23891482376391093290e-01f, + 3.20623784956905355514e-01f, 3.17370638029913609834e-01f, + 3.14131931596337177215e-01f, 3.10907558126286509559e-01f, + 3.07697412504292056035e-01f, 3.04501391976649993243e-01f, + 3.01319396100803049698e-01f, 2.98151326696685481377e-01f, + 2.94997087799961810184e-01f, 2.91856585617095209972e-01f, + 2.88729728482182923521e-01f, 2.85616426815501756042e-01f, + 2.82516593083707578948e-01f, 2.79430141761637940157e-01f, + 2.76356989295668320494e-01f, 2.73297054068577072172e-01f, + 2.70250256365875463072e-01f, 2.67216518343561471038e-01f, + 2.64195763997261190426e-01f, 2.61187919132721213522e-01f, + 2.58192911337619235290e-01f, 2.55210669954661961700e-01f, + 2.52241126055942177508e-01f, 2.49284212418528522415e-01f, + 2.46339863501263828249e-01f, 2.43408015422750312329e-01f, + 2.40488605940500588254e-01f, 2.37581574431238090606e-01f, + 2.34686861872330010392e-01f, 2.31804410824338724684e-01f, + 2.28934165414680340644e-01f, 2.26076071322380278694e-01f, + 2.23230075763917484855e-01f, 2.20396127480151998723e-01f, + 2.17574176724331130872e-01f, 2.14764175251173583536e-01f, + 2.11966076307030182324e-01f, 2.09179834621125076977e-01f, + 2.06405406397880797353e-01f, 2.03642749310334908452e-01f, + 2.00891822494656591136e-01f, 1.98152586545775138971e-01f, + 1.95425003514134304483e-01f, 1.92709036903589175926e-01f, + 1.90004651670464985713e-01f, 1.87311814223800304768e-01f, + 1.84630492426799269756e-01f, 1.81960655599522513892e-01f, + 1.79302274522847582272e-01f, 1.76655321443734858455e-01f, + 1.74019770081838553999e-01f, 1.71395595637505754327e-01f, + 1.68782774801211288285e-01f, 1.66181285764481906364e-01f, + 1.63591108232365584074e-01f, 1.61012223437511009516e-01f, + 1.58444614155924284882e-01f, 1.55888264724479197465e-01f, + 1.53343161060262855866e-01f, 1.50809290681845675763e-01f, + 1.48286642732574552861e-01f, 1.45775208005994028060e-01f, + 1.43274978973513461566e-01f, 1.40785949814444699690e-01f, + 1.38308116448550733057e-01f, 1.35841476571253755301e-01f, + 1.33386029691669155683e-01f, 1.30941777173644358090e-01f, + 1.28508722279999570981e-01f, 1.26086870220185887081e-01f, + 1.23676228201596571932e-01f, 1.21276805484790306533e-01f, + 1.18888613442910059947e-01f, 1.16511665625610869035e-01f, + 1.14145977827838487895e-01f, 1.11791568163838089811e-01f, + 1.09448457146811797824e-01f, 1.07116667774683801961e-01f, + 1.04796225622487068629e-01f, 1.02487158941935246892e-01f, + 1.00189498768810017482e-01f, 9.79032790388624646338e-02f, + 9.56285367130089991594e-02f, 9.33653119126910124859e-02f, + 9.11136480663737591268e-02f, 8.88735920682758862021e-02f, + 8.66451944505580717859e-02f, 8.44285095703534715916e-02f, + 8.22235958132029043366e-02f, 8.00305158146630696292e-02f, + 7.78493367020961224423e-02f, 7.56801303589271778804e-02f, + 7.35229737139813238622e-02f, 7.13779490588904025339e-02f, + 6.92451443970067553879e-02f, 6.71246538277884968737e-02f, + 6.50165779712428976156e-02f, 6.29210244377581412456e-02f, + 6.08381083495398780614e-02f, 5.87679529209337372930e-02f, + 5.67106901062029017391e-02f, 5.46664613248889208474e-02f, + 5.26354182767921896513e-02f, 5.06177238609477817000e-02f, + 4.86135532158685421122e-02f, 4.66230949019303814174e-02f, + 4.46465522512944634759e-02f, 4.26841449164744590750e-02f, + 4.07361106559409394401e-02f, 3.88027074045261474722e-02f, + 3.68842156885673053135e-02f, 3.49809414617161251737e-02f, + 3.30932194585785779961e-02f, 3.12214171919203004046e-02f, + 2.93659397581333588001e-02f, 2.75272356696031131329e-02f, + 2.57058040085489103443e-02f, 2.39022033057958785407e-02f, + 2.21170627073088502113e-02f, 2.03510962300445102935e-02f, + 1.86051212757246224594e-02f, 1.68800831525431419000e-02f, + 1.51770883079353092332e-02f, 1.34974506017398673818e-02f, + 1.18427578579078790488e-02f, 1.02149714397014590439e-02f, + 8.61658276939872638800e-03f, 7.05087547137322242369e-03f, + 5.52240329925099155545e-03f, 4.03797259336302356153e-03f, + 2.60907274610215926189e-03f, 1.26028593049859797236e-03f}; + +static const uint64_t ke_double[] = { + 0x001C5214272497C6, 0x0000000000000000, 0x00137D5BD79C317E, + 0x00186EF58E3F3C10, 0x001A9BB7320EB0AE, 0x001BD127F719447C, + 0x001C951D0F88651A, 0x001D1BFE2D5C3972, 0x001D7E5BD56B18B2, + 0x001DC934DD172C70, 0x001E0409DFAC9DC8, 0x001E337B71D47836, + 0x001E5A8B177CB7A2, 0x001E7B42096F046C, 0x001E970DAF08AE3E, + 0x001EAEF5B14EF09E, 0x001EC3BD07B46556, 0x001ED5F6F08799CE, + 0x001EE614AE6E5688, 0x001EF46ECA361CD0, 0x001F014B76DDD4A4, + 0x001F0CE313A796B6, 0x001F176369F1F77A, 0x001F20F20C452570, + 0x001F29AE1951A874, 0x001F31B18FB95532, 0x001F39125157C106, + 0x001F3FE2EB6E694C, 0x001F463332D788FA, 0x001F4C10BF1D3A0E, + 0x001F51874C5C3322, 0x001F56A109C3ECC0, 0x001F5B66D9099996, + 0x001F5FE08210D08C, 0x001F6414DD445772, 0x001F6809F6859678, + 0x001F6BC52A2B02E6, 0x001F6F4B3D32E4F4, 0x001F72A07190F13A, + 0x001F75C8974D09D6, 0x001F78C71B045CC0, 0x001F7B9F12413FF4, + 0x001F7E5346079F8A, 0x001F80E63BE21138, 0x001F835A3DAD9162, + 0x001F85B16056B912, 0x001F87ED89B24262, 0x001F8A10759374FA, + 0x001F8C1BBA3D39AC, 0x001F8E10CC45D04A, 0x001F8FF102013E16, + 0x001F91BD968358E0, 0x001F9377AC47AFD8, 0x001F95204F8B64DA, + 0x001F96B878633892, 0x001F98410C968892, 0x001F99BAE146BA80, + 0x001F9B26BC697F00, 0x001F9C85561B717A, 0x001F9DD759CFD802, + 0x001F9F1D6761A1CE, 0x001FA058140936C0, 0x001FA187EB3A3338, + 0x001FA2AD6F6BC4FC, 0x001FA3C91ACE0682, 0x001FA4DB5FEE6AA2, + 0x001FA5E4AA4D097C, 0x001FA6E55EE46782, 0x001FA7DDDCA51EC4, + 0x001FA8CE7CE6A874, 0x001FA9B793CE5FEE, 0x001FAA9970ADB858, + 0x001FAB745E588232, 0x001FAC48A3740584, 0x001FAD1682BF9FE8, + 0x001FADDE3B5782C0, 0x001FAEA008F21D6C, 0x001FAF5C2418B07E, + 0x001FB012C25B7A12, 0x001FB0C41681DFF4, 0x001FB17050B6F1FA, + 0x001FB2179EB2963A, 0x001FB2BA2BDFA84A, 0x001FB358217F4E18, + 0x001FB3F1A6C9BE0C, 0x001FB486E10CACD6, 0x001FB517F3C793FC, + 0x001FB5A500C5FDAA, 0x001FB62E2837FE58, 0x001FB6B388C9010A, + 0x001FB7353FB50798, 0x001FB7B368DC7DA8, 0x001FB82E1ED6BA08, + 0x001FB8A57B0347F6, 0x001FB919959A0F74, 0x001FB98A85BA7204, + 0x001FB9F861796F26, 0x001FBA633DEEE286, 0x001FBACB2F41EC16, + 0x001FBB3048B49144, 0x001FBB929CAEA4E2, 0x001FBBF23CC8029E, + 0x001FBC4F39D22994, 0x001FBCA9A3E140D4, 0x001FBD018A548F9E, + 0x001FBD56FBDE729C, 0x001FBDAA068BD66A, 0x001FBDFAB7CB3F40, + 0x001FBE491C7364DE, 0x001FBE9540C9695E, 0x001FBEDF3086B128, + 0x001FBF26F6DE6174, 0x001FBF6C9E828AE2, 0x001FBFB031A904C4, + 0x001FBFF1BA0FFDB0, 0x001FC03141024588, 0x001FC06ECF5B54B2, + 0x001FC0AA6D8B1426, 0x001FC0E42399698A, 0x001FC11BF9298A64, + 0x001FC151F57D1942, 0x001FC1861F770F4A, 0x001FC1B87D9E74B4, + 0x001FC1E91620EA42, 0x001FC217EED505DE, 0x001FC2450D3C83FE, + 0x001FC27076864FC2, 0x001FC29A2F90630E, 0x001FC2C23CE98046, + 0x001FC2E8A2D2C6B4, 0x001FC30D654122EC, 0x001FC33087DE9C0E, + 0x001FC3520E0B7EC6, 0x001FC371FADF66F8, 0x001FC390512A2886, + 0x001FC3AD137497FA, 0x001FC3C844013348, 0x001FC3E1E4CCAB40, + 0x001FC3F9F78E4DA8, 0x001FC4107DB85060, 0x001FC4257877FD68, + 0x001FC438E8B5BFC6, 0x001FC44ACF15112A, 0x001FC45B2BF447E8, + 0x001FC469FF6C4504, 0x001FC477495001B2, 0x001FC483092BFBB8, + 0x001FC48D3E457FF6, 0x001FC495E799D21A, 0x001FC49D03DD30B0, + 0x001FC4A29179B432, 0x001FC4A68E8E07FC, 0x001FC4A8F8EBFB8C, + 0x001FC4A9CE16EA9E, 0x001FC4A90B41FA34, 0x001FC4A6AD4E28A0, + 0x001FC4A2B0C82E74, 0x001FC49D11E62DE2, 0x001FC495CC852DF4, + 0x001FC48CDC265EC0, 0x001FC4823BEC237A, 0x001FC475E696DEE6, + 0x001FC467D6817E82, 0x001FC458059DC036, 0x001FC4466D702E20, + 0x001FC433070BCB98, 0x001FC41DCB0D6E0E, 0x001FC406B196BBF6, + 0x001FC3EDB248CB62, 0x001FC3D2C43E593C, 0x001FC3B5DE0591B4, + 0x001FC396F599614C, 0x001FC376005A4592, 0x001FC352F3069370, + 0x001FC32DC1B22818, 0x001FC3065FBD7888, 0x001FC2DCBFCBF262, + 0x001FC2B0D3B99F9E, 0x001FC2828C8FFCF0, 0x001FC251DA79F164, + 0x001FC21EACB6D39E, 0x001FC1E8F18C6756, 0x001FC1B09637BB3C, + 0x001FC17586DCCD10, 0x001FC137AE74D6B6, 0x001FC0F6F6BB2414, + 0x001FC0B348184DA4, 0x001FC06C898BAFF0, 0x001FC022A092F364, + 0x001FBFD5710F72B8, 0x001FBF84DD29488E, 0x001FBF30C52FC60A, + 0x001FBED907770CC6, 0x001FBE7D80327DDA, 0x001FBE1E094BA614, + 0x001FBDBA7A354408, 0x001FBD52A7B9F826, 0x001FBCE663C6201A, + 0x001FBC757D2C4DE4, 0x001FBBFFBF63B7AA, 0x001FBB84F23FE6A2, + 0x001FBB04D9A0D18C, 0x001FBA7F351A70AC, 0x001FB9F3BF92B618, + 0x001FB9622ED4ABFC, 0x001FB8CA33174A16, 0x001FB82B76765B54, + 0x001FB7859C5B895C, 0x001FB6D840D55594, 0x001FB622F7D96942, + 0x001FB5654C6F37E0, 0x001FB49EBFBF69D2, 0x001FB3CEC803E746, + 0x001FB2F4CF539C3E, 0x001FB21032442852, 0x001FB1203E5A9604, + 0x001FB0243042E1C2, 0x001FAF1B31C479A6, 0x001FAE045767E104, + 0x001FACDE9DBF2D72, 0x001FABA8E640060A, 0x001FAA61F399FF28, + 0x001FA908656F66A2, 0x001FA79AB3508D3C, 0x001FA61726D1F214, + 0x001FA47BD48BEA00, 0x001FA2C693C5C094, 0x001FA0F4F47DF314, + 0x001F9F04336BBE0A, 0x001F9CF12B79F9BC, 0x001F9AB84415ABC4, + 0x001F98555B782FB8, 0x001F95C3ABD03F78, 0x001F92FDA9CEF1F2, + 0x001F8FFCDA9AE41C, 0x001F8CB99E7385F8, 0x001F892AEC479606, + 0x001F8545F904DB8E, 0x001F80FDC336039A, 0x001F7C427839E926, + 0x001F7700A3582ACC, 0x001F71200F1A241C, 0x001F6A8234B7352A, + 0x001F630000A8E266, 0x001F5A66904FE3C4, 0x001F50724ECE1172, + 0x001F44C7665C6FDA, 0x001F36E5A38A59A2, 0x001F26143450340A, + 0x001F113E047B0414, 0x001EF6AEFA57CBE6, 0x001ED38CA188151E, + 0x001EA2A61E122DB0, 0x001E5961C78B267C, 0x001DDDF62BAC0BB0, + 0x001CDB4DD9E4E8C0}; + +static const double we_double[] = { + 9.655740063209182975e-16, 7.089014243955414331e-18, + 1.163941249669122378e-17, 1.524391512353216015e-17, + 1.833284885723743916e-17, 2.108965109464486630e-17, + 2.361128077843138196e-17, 2.595595772310893952e-17, + 2.816173554197752338e-17, 3.025504130321382330e-17, + 3.225508254836375280e-17, 3.417632340185027033e-17, + 3.602996978734452488e-17, 3.782490776869649048e-17, + 3.956832198097553231e-17, 4.126611778175946428e-17, + 4.292321808442525631e-17, 4.454377743282371417e-17, + 4.613133981483185932e-17, 4.768895725264635940e-17, + 4.921928043727962847e-17, 5.072462904503147014e-17, + 5.220704702792671737e-17, 5.366834661718192181e-17, + 5.511014372835094717e-17, 5.653388673239667134e-17, + 5.794088004852766616e-17, 5.933230365208943081e-17, + 6.070922932847179572e-17, 6.207263431163193485e-17, + 6.342341280303076511e-17, 6.476238575956142121e-17, + 6.609030925769405241e-17, 6.740788167872722244e-17, + 6.871574991183812442e-17, 7.001451473403929616e-17, + 7.130473549660643409e-17, 7.258693422414648352e-17, + 7.386159921381791997e-17, 7.512918820723728089e-17, + 7.639013119550825792e-17, 7.764483290797848102e-17, + 7.889367502729790548e-17, 8.013701816675454434e-17, + 8.137520364041762206e-17, 8.260855505210038174e-17, + 8.383737972539139383e-17, 8.506196999385323132e-17, + 8.628260436784112996e-17, 8.749954859216182511e-17, + 8.871305660690252281e-17, 8.992337142215357066e-17, + 9.113072591597909173e-17, 9.233534356381788123e-17, + 9.353743910649128938e-17, 9.473721916312949566e-17, + 9.593488279457997317e-17, 9.713062202221521206e-17, + 9.832462230649511362e-17, 9.951706298915071878e-17, + 1.007081177024294931e-16, 1.018979547484694078e-16, + 1.030867374515421954e-16, 1.042746244856188556e-16, + 1.054617701794576406e-16, 1.066483248011914702e-16, + 1.078344348241948498e-16, 1.090202431758350473e-16, + 1.102058894705578110e-16, 1.113915102286197502e-16, + 1.125772390816567488e-16, 1.137632069661684705e-16, + 1.149495423059009298e-16, 1.161363711840218308e-16, + 1.173238175059045788e-16, 1.185120031532669434e-16, + 1.197010481303465158e-16, 1.208910707027385520e-16, + 1.220821875294706151e-16, 1.232745137888415193e-16, + 1.244681632985112523e-16, 1.256632486302898513e-16, + 1.268598812200397542e-16, 1.280581714730749379e-16, + 1.292582288654119552e-16, 1.304601620412028847e-16, + 1.316640789066572582e-16, 1.328700867207380889e-16, + 1.340782921828999433e-16, 1.352888015181175458e-16, + 1.365017205594397770e-16, 1.377171548282880964e-16, + 1.389352096127063919e-16, 1.401559900437571538e-16, + 1.413796011702485188e-16, 1.426061480319665444e-16, + 1.438357357315790180e-16, 1.450684695053687684e-16, + 1.463044547929475721e-16, 1.475437973060951633e-16, + 1.487866030968626066e-16, 1.500329786250736949e-16, + 1.512830308253539427e-16, 1.525368671738125550e-16, + 1.537945957544996933e-16, 1.550563253257577148e-16, + 1.563221653865837505e-16, 1.575922262431176140e-16, + 1.588666190753684151e-16, 1.601454560042916733e-16, + 1.614288501593278662e-16, 1.627169157465130500e-16, + 1.640097681172717950e-16, 1.653075238380036909e-16, + 1.666103007605742067e-16, 1.679182180938228863e-16, + 1.692313964762022267e-16, 1.705499580496629830e-16, + 1.718740265349031656e-16, 1.732037273081008369e-16, + 1.745391874792533975e-16, 1.758805359722491379e-16, + 1.772279036068006489e-16, 1.785814231823732619e-16, + 1.799412295642463721e-16, 1.813074597718501559e-16, + 1.826802530695252266e-16, 1.840597510598587828e-16, + 1.854460977797569461e-16, 1.868394397994192684e-16, + 1.882399263243892051e-16, 1.896477093008616722e-16, + 1.910629435244376536e-16, 1.924857867525243818e-16, + 1.939163998205899420e-16, 1.953549467624909132e-16, + 1.968015949351037382e-16, 1.982565151475019047e-16, + 1.997198817949342081e-16, 2.011918729978734671e-16, + 2.026726707464198289e-16, 2.041624610503588774e-16, + 2.056614340951917875e-16, 2.071697844044737034e-16, + 2.086877110088159721e-16, 2.102154176219292789e-16, + 2.117531128241075913e-16, 2.133010102535779087e-16, + 2.148593288061663316e-16, 2.164282928437604723e-16, + 2.180081324120784027e-16, 2.195990834682870728e-16, + 2.212013881190495942e-16, 2.228152948696180545e-16, + 2.244410588846308588e-16, 2.260789422613173739e-16, + 2.277292143158621037e-16, 2.293921518837311354e-16, + 2.310680396348213318e-16, 2.327571704043534613e-16, + 2.344598455404957859e-16, 2.361763752697773994e-16, + 2.379070790814276700e-16, 2.396522861318623520e-16, + 2.414123356706293277e-16, 2.431875774892255956e-16, + 2.449783723943070217e-16, 2.467850927069288738e-16, + 2.486081227895851719e-16, 2.504478596029557040e-16, + 2.523047132944217013e-16, 2.541791078205812227e-16, + 2.560714816061770759e-16, 2.579822882420530896e-16, + 2.599119972249746917e-16, 2.618610947423924219e-16, + 2.638300845054942823e-16, 2.658194886341845120e-16, + 2.678298485979525166e-16, 2.698617262169488933e-16, + 2.719157047279818500e-16, 2.739923899205814823e-16, + 2.760924113487617126e-16, 2.782164236246436081e-16, + 2.803651078006983464e-16, 2.825391728480253184e-16, + 2.847393572388174091e-16, 2.869664306419817679e-16, + 2.892211957417995598e-16, 2.915044901905293183e-16, + 2.938171887070028633e-16, 2.961602053345465687e-16, + 2.985344958730045276e-16, 3.009410605012618141e-16, + 3.033809466085003416e-16, 3.058552518544860874e-16, + 3.083651274815310004e-16, 3.109117819034266344e-16, + 3.134964845996663118e-16, 3.161205703467105734e-16, + 3.187854438219713117e-16, 3.214925846206797361e-16, + 3.242435527309451638e-16, 3.270399945182240440e-16, + 3.298836492772283149e-16, 3.327763564171671408e-16, + 3.357200633553244075e-16, 3.387168342045505162e-16, + 3.417688593525636996e-16, 3.448784660453423890e-16, + 3.480481301037442286e-16, 3.512804889222979418e-16, + 3.545783559224791863e-16, 3.579447366604276541e-16, + 3.613828468219060593e-16, 3.648961323764542545e-16, + 3.684882922095621322e-16, 3.721633036080207290e-16, + 3.759254510416256532e-16, 3.797793587668874387e-16, + 3.837300278789213687e-16, 3.877828785607895292e-16, + 3.919437984311428867e-16, 3.962191980786774996e-16, + 4.006160751056541688e-16, 4.051420882956573177e-16, + 4.098056438903062509e-16, 4.146159964290904582e-16, + 4.195833672073398926e-16, 4.247190841824385048e-16, + 4.300357481667470702e-16, 4.355474314693952008e-16, + 4.412699169036069903e-16, 4.472209874259932285e-16, + 4.534207798565834480e-16, 4.598922204905932469e-16, + 4.666615664711475780e-16, 4.737590853262492027e-16, + 4.812199172829237933e-16, 4.890851827392209900e-16, + 4.974034236191939753e-16, 5.062325072144159699e-16, + 5.156421828878082953e-16, 5.257175802022274839e-16, + 5.365640977112021618e-16, 5.483144034258703912e-16, + 5.611387454675159622e-16, 5.752606481503331688e-16, + 5.909817641652102998e-16, 6.087231416180907671e-16, + 6.290979034877557049e-16, 6.530492053564040799e-16, + 6.821393079028928626e-16, 7.192444966089361564e-16, + 7.706095350032096755e-16, 8.545517038584027421e-16}; + +static const double fe_double[] = { + 1.000000000000000000e+00, 9.381436808621747003e-01, + 9.004699299257464817e-01, 8.717043323812035949e-01, + 8.477855006239896074e-01, 8.269932966430503241e-01, + 8.084216515230083777e-01, 7.915276369724956185e-01, + 7.759568520401155522e-01, 7.614633888498962833e-01, + 7.478686219851951034e-01, 7.350380924314234843e-01, + 7.228676595935720206e-01, 7.112747608050760117e-01, + 7.001926550827881623e-01, 6.895664961170779872e-01, + 6.793505722647653622e-01, 6.695063167319247333e-01, + 6.600008410789997004e-01, 6.508058334145710999e-01, + 6.418967164272660897e-01, 6.332519942143660652e-01, + 6.248527387036659775e-01, 6.166821809152076561e-01, + 6.087253820796220127e-01, 6.009689663652322267e-01, + 5.934009016917334289e-01, 5.860103184772680329e-01, + 5.787873586028450257e-01, 5.717230486648258170e-01, + 5.648091929124001709e-01, 5.580382822625874484e-01, + 5.514034165406412891e-01, 5.448982376724396115e-01, + 5.385168720028619127e-01, 5.322538802630433219e-01, + 5.261042139836197284e-01, 5.200631773682335979e-01, + 5.141263938147485613e-01, 5.082897764106428795e-01, + 5.025495018413477233e-01, 4.969019872415495476e-01, + 4.913438695940325340e-01, 4.858719873418849144e-01, + 4.804833639304542103e-01, 4.751751930373773747e-01, + 4.699448252839599771e-01, 4.647897562504261781e-01, + 4.597076156421376902e-01, 4.546961574746155033e-01, + 4.497532511627549967e-01, 4.448768734145485126e-01, + 4.400651008423538957e-01, 4.353161032156365740e-01, + 4.306281372884588343e-01, 4.259995411430343437e-01, + 4.214287289976165751e-01, 4.169141864330028757e-01, + 4.124544659971611793e-01, 4.080481831520323954e-01, + 4.036940125305302773e-01, 3.993906844752310725e-01, + 3.951369818332901573e-01, 3.909317369847971069e-01, + 3.867738290841376547e-01, 3.826621814960098344e-01, + 3.785957594095807899e-01, 3.745735676159021588e-01, + 3.705946484351460013e-01, 3.666580797815141568e-01, + 3.627629733548177748e-01, 3.589084729487497794e-01, + 3.550937528667874599e-01, 3.513180164374833381e-01, + 3.475804946216369817e-01, 3.438804447045024082e-01, + 3.402171490667800224e-01, 3.365899140286776059e-01, + 3.329980687618089852e-01, 3.294409642641363267e-01, + 3.259179723935561879e-01, 3.224284849560891675e-01, + 3.189719128449572394e-01, 3.155476852271289490e-01, + 3.121552487741795501e-01, 3.087940669345601852e-01, + 3.054636192445902565e-01, 3.021634006756935276e-01, + 2.988929210155817917e-01, 2.956517042812611962e-01, + 2.924392881618925744e-01, 2.892552234896777485e-01, + 2.860990737370768255e-01, 2.829704145387807457e-01, + 2.798688332369729248e-01, 2.767939284485173568e-01, + 2.737453096528029706e-01, 2.707225967990600224e-01, + 2.677254199320447947e-01, 2.647534188350622042e-01, + 2.618062426893629779e-01, 2.588835497490162285e-01, + 2.559850070304153791e-01, 2.531102900156294577e-01, + 2.502590823688622956e-01, 2.474310756653276266e-01, + 2.446259691318921070e-01, 2.418434693988772144e-01, + 2.390832902624491774e-01, 2.363451524570596429e-01, + 2.336287834374333461e-01, 2.309339171696274118e-01, + 2.282602939307167011e-01, 2.256076601166840667e-01, + 2.229757680581201940e-01, 2.203643758433594946e-01, + 2.177732471487005272e-01, 2.152021510753786837e-01, + 2.126508619929782795e-01, 2.101191593889882581e-01, + 2.076068277242220372e-01, 2.051136562938377095e-01, + 2.026394390937090173e-01, 2.001839746919112650e-01, + 1.977470661050988732e-01, 1.953285206795632167e-01, + 1.929281499767713515e-01, 1.905457696631953912e-01, + 1.881811994042543179e-01, 1.858342627621971110e-01, + 1.835047870977674633e-01, 1.811926034754962889e-01, + 1.788975465724783054e-01, 1.766194545904948843e-01, + 1.743581691713534942e-01, 1.721135353153200598e-01, + 1.698854013025276610e-01, 1.676736186172501919e-01, + 1.654780418749360049e-01, 1.632985287519018169e-01, + 1.611349399175920349e-01, 1.589871389693142123e-01, + 1.568549923693652315e-01, 1.547383693844680830e-01, + 1.526371420274428570e-01, 1.505511850010398944e-01, + 1.484803756438667910e-01, 1.464245938783449441e-01, + 1.443837221606347754e-01, 1.423576454324722018e-01, + 1.403462510748624548e-01, 1.383494288635802039e-01, + 1.363670709264288572e-01, 1.343990717022136294e-01, + 1.324453279013875218e-01, 1.305057384683307731e-01, + 1.285802045452281717e-01, 1.266686294375106714e-01, + 1.247709185808309612e-01, 1.228869795095451356e-01, + 1.210167218266748335e-01, 1.191600571753276827e-01, + 1.173168992115555670e-01, 1.154871635786335338e-01, + 1.136707678827443141e-01, 1.118676316700562973e-01, + 1.100776764051853845e-01, 1.083008254510337970e-01, + 1.065370040500016602e-01, 1.047861393065701724e-01, + 1.030481601712577161e-01, 1.013229974259536315e-01, + 9.961058367063713170e-02, 9.791085331149219917e-02, + 9.622374255043279756e-02, 9.454918937605585882e-02, + 9.288713355604354127e-02, 9.123751663104015530e-02, + 8.960028191003285847e-02, 8.797537446727021759e-02, + 8.636274114075691288e-02, 8.476233053236811865e-02, + 8.317409300963238272e-02, 8.159798070923741931e-02, + 8.003394754231990538e-02, 7.848194920160642130e-02, + 7.694194317048050347e-02, 7.541388873405840965e-02, + 7.389774699236474620e-02, 7.239348087570873780e-02, + 7.090105516237182881e-02, 6.942043649872875477e-02, + 6.795159342193660135e-02, 6.649449638533977414e-02, + 6.504911778675374900e-02, 6.361543199980733421e-02, + 6.219341540854099459e-02, 6.078304644547963265e-02, + 5.938430563342026597e-02, 5.799717563120065922e-02, + 5.662164128374287675e-02, 5.525768967669703741e-02, + 5.390531019604608703e-02, 5.256449459307169225e-02, + 5.123523705512628146e-02, 4.991753428270637172e-02, + 4.861138557337949667e-02, 4.731679291318154762e-02, + 4.603376107617516977e-02, 4.476229773294328196e-02, + 4.350241356888818328e-02, 4.225412241331623353e-02, + 4.101744138041481941e-02, 3.979239102337412542e-02, + 3.857899550307485742e-02, 3.737728277295936097e-02, + 3.618728478193142251e-02, 3.500903769739741045e-02, + 3.384258215087432992e-02, 3.268796350895953468e-02, + 3.154523217289360859e-02, 3.041444391046660423e-02, + 2.929566022463739317e-02, 2.818894876397863569e-02, + 2.709438378095579969e-02, 2.601204664513421735e-02, + 2.494202641973178314e-02, 2.388442051155817078e-02, + 2.283933540638524023e-02, 2.180688750428358066e-02, + 2.078720407257811723e-02, 1.978042433800974303e-02, + 1.878670074469603046e-02, 1.780620041091136169e-02, + 1.683910682603994777e-02, 1.588562183997316302e-02, + 1.494596801169114850e-02, 1.402039140318193759e-02, + 1.310916493125499106e-02, 1.221259242625538123e-02, + 1.133101359783459695e-02, 1.046481018102997894e-02, + 9.614413642502209895e-03, 8.780314985808975251e-03, + 7.963077438017040002e-03, 7.163353183634983863e-03, + 6.381905937319179087e-03, 5.619642207205483020e-03, + 4.877655983542392333e-03, 4.157295120833795314e-03, + 3.460264777836904049e-03, 2.788798793574076128e-03, + 2.145967743718906265e-03, 1.536299780301572356e-03, + 9.672692823271745359e-04, 4.541343538414967652e-04}; + +static const uint32_t ke_float[] = { + 0x00714851UL, 0x00000000UL, 0x004DF56FUL, 0x0061BBD6UL, 0x006A6EDDUL, + 0x006F44A0UL, 0x00725474UL, 0x00746FF9UL, 0x0075F96FUL, 0x007724D3UL, + 0x00781027UL, 0x0078CDEEUL, 0x00796A2CUL, 0x0079ED08UL, 0x007A5C37UL, + 0x007ABBD7UL, 0x007B0EF4UL, 0x007B57DCUL, 0x007B9853UL, 0x007BD1BBUL, + 0x007C052EUL, 0x007C338CUL, 0x007C5D8EUL, 0x007C83C8UL, 0x007CA6B8UL, + 0x007CC6C6UL, 0x007CE449UL, 0x007CFF8CUL, 0x007D18CDUL, 0x007D3043UL, + 0x007D461DUL, 0x007D5A84UL, 0x007D6D9BUL, 0x007D7F82UL, 0x007D9053UL, + 0x007DA028UL, 0x007DAF15UL, 0x007DBD2DUL, 0x007DCA82UL, 0x007DD722UL, + 0x007DE31CUL, 0x007DEE7CUL, 0x007DF94DUL, 0x007E0399UL, 0x007E0D69UL, + 0x007E16C6UL, 0x007E1FB6UL, 0x007E2842UL, 0x007E306FUL, 0x007E3843UL, + 0x007E3FC4UL, 0x007E46F6UL, 0x007E4DDFUL, 0x007E5481UL, 0x007E5AE2UL, + 0x007E6104UL, 0x007E66ECUL, 0x007E6C9BUL, 0x007E7215UL, 0x007E775DUL, + 0x007E7C76UL, 0x007E8160UL, 0x007E8620UL, 0x007E8AB6UL, 0x007E8F24UL, + 0x007E936DUL, 0x007E9793UL, 0x007E9B95UL, 0x007E9F77UL, 0x007EA33AUL, + 0x007EA6DEUL, 0x007EAA66UL, 0x007EADD1UL, 0x007EB123UL, 0x007EB45AUL, + 0x007EB779UL, 0x007EBA80UL, 0x007EBD71UL, 0x007EC04BUL, 0x007EC310UL, + 0x007EC5C1UL, 0x007EC85EUL, 0x007ECAE9UL, 0x007ECD61UL, 0x007ECFC7UL, + 0x007ED21CUL, 0x007ED460UL, 0x007ED694UL, 0x007ED8B9UL, 0x007EDACEUL, + 0x007EDCD5UL, 0x007EDECEUL, 0x007EE0B8UL, 0x007EE296UL, 0x007EE466UL, + 0x007EE62AUL, 0x007EE7E2UL, 0x007EE98DUL, 0x007EEB2DUL, 0x007EECC1UL, + 0x007EEE4AUL, 0x007EEFC9UL, 0x007EF13DUL, 0x007EF2A7UL, 0x007EF406UL, + 0x007EF55CUL, 0x007EF6A8UL, 0x007EF7EBUL, 0x007EF924UL, 0x007EFA55UL, + 0x007EFB7DUL, 0x007EFC9CUL, 0x007EFDB2UL, 0x007EFEC1UL, 0x007EFFC7UL, + 0x007F00C5UL, 0x007F01BBUL, 0x007F02AAUL, 0x007F0391UL, 0x007F0470UL, + 0x007F0548UL, 0x007F0618UL, 0x007F06E2UL, 0x007F07A4UL, 0x007F0860UL, + 0x007F0914UL, 0x007F09C2UL, 0x007F0A69UL, 0x007F0B09UL, 0x007F0BA3UL, + 0x007F0C36UL, 0x007F0CC2UL, 0x007F0D48UL, 0x007F0DC8UL, 0x007F0E41UL, + 0x007F0EB4UL, 0x007F0F21UL, 0x007F0F88UL, 0x007F0FE8UL, 0x007F1042UL, + 0x007F1096UL, 0x007F10E4UL, 0x007F112BUL, 0x007F116DUL, 0x007F11A8UL, + 0x007F11DDUL, 0x007F120CUL, 0x007F1235UL, 0x007F1258UL, 0x007F1274UL, + 0x007F128AUL, 0x007F129AUL, 0x007F12A4UL, 0x007F12A7UL, 0x007F12A4UL, + 0x007F129BUL, 0x007F128BUL, 0x007F1274UL, 0x007F1257UL, 0x007F1233UL, + 0x007F1209UL, 0x007F11D8UL, 0x007F119FUL, 0x007F1160UL, 0x007F111AUL, + 0x007F10CCUL, 0x007F1077UL, 0x007F101BUL, 0x007F0FB7UL, 0x007F0F4BUL, + 0x007F0ED7UL, 0x007F0E5CUL, 0x007F0DD8UL, 0x007F0D4CUL, 0x007F0CB7UL, + 0x007F0C19UL, 0x007F0B73UL, 0x007F0AC3UL, 0x007F0A0AUL, 0x007F0947UL, + 0x007F087BUL, 0x007F07A4UL, 0x007F06C2UL, 0x007F05D6UL, 0x007F04DFUL, + 0x007F03DCUL, 0x007F02CDUL, 0x007F01B2UL, 0x007F008BUL, 0x007EFF56UL, + 0x007EFE13UL, 0x007EFCC3UL, 0x007EFB64UL, 0x007EF9F6UL, 0x007EF878UL, + 0x007EF6EAUL, 0x007EF54BUL, 0x007EF39AUL, 0x007EF1D6UL, 0x007EEFFFUL, + 0x007EEE14UL, 0x007EEC13UL, 0x007EE9FDUL, 0x007EE7CFUL, 0x007EE589UL, + 0x007EE329UL, 0x007EE0AEUL, 0x007EDE16UL, 0x007EDB61UL, 0x007ED88CUL, + 0x007ED595UL, 0x007ED27BUL, 0x007ECF3BUL, 0x007ECBD3UL, 0x007EC841UL, + 0x007EC481UL, 0x007EC091UL, 0x007EBC6DUL, 0x007EB811UL, 0x007EB37AUL, + 0x007EAEA4UL, 0x007EA988UL, 0x007EA422UL, 0x007E9E6BUL, 0x007E985DUL, + 0x007E91EFUL, 0x007E8B1AUL, 0x007E83D4UL, 0x007E7C11UL, 0x007E73C5UL, + 0x007E6AE1UL, 0x007E6155UL, 0x007E570FUL, 0x007E4BF7UL, 0x007E3FF3UL, + 0x007E32E6UL, 0x007E24ACUL, 0x007E1518UL, 0x007E03F7UL, 0x007DF10AUL, + 0x007DDC03UL, 0x007DC480UL, 0x007DAA09UL, 0x007D8C00UL, 0x007D699AUL, + 0x007D41C9UL, 0x007D131EUL, 0x007CDB97UL, 0x007C9851UL, 0x007C44F8UL, + 0x007BDABCUL, 0x007B4E33UL, 0x007A8A98UL, 0x00796587UL, 0x007777D9UL, + 0x00736D37UL, +}; +static const float we_float[] = { + 1.03677719e-06F, 7.61177108e-09F, 1.24977240e-08F, 1.63680292e-08F, + 1.96847466e-08F, 2.26448404e-08F, 2.53524197e-08F, 2.78699974e-08F, + 3.02384333e-08F, 3.24861032e-08F, 3.46336312e-08F, 3.66965478e-08F, + 3.86868855e-08F, 4.06141855e-08F, 4.24861622e-08F, 4.43091566e-08F, + 4.60884545e-08F, 4.78285168e-08F, 4.95331490e-08F, 5.12056279e-08F, + 5.28488000e-08F, 5.44651557e-08F, 5.60568899e-08F, 5.76259484e-08F, + 5.91740662e-08F, 6.07027987e-08F, 6.22135462e-08F, 6.37075759e-08F, + 6.51860386e-08F, 6.66499836e-08F, 6.81003709e-08F, 6.95380822e-08F, + 7.09639292e-08F, 7.23786618e-08F, 7.37829746e-08F, 7.51775128e-08F, + 7.65628768e-08F, 7.79396272e-08F, 7.93082883e-08F, 8.06693516e-08F, + 8.20232788e-08F, 8.33705045e-08F, 8.47114385e-08F, 8.60464681e-08F, + 8.73759596e-08F, 8.87002606e-08F, 9.00197010e-08F, 9.13345948e-08F, + 9.26452410e-08F, 9.39519249e-08F, 9.52549192e-08F, 9.65544849e-08F, + 9.78508719e-08F, 9.91443202e-08F, 1.00435060e-07F, 1.01723315e-07F, + 1.03009296e-07F, 1.04293211e-07F, 1.05575259e-07F, 1.06855633e-07F, + 1.08134518e-07F, 1.09412096e-07F, 1.10688542e-07F, 1.11964025e-07F, + 1.13238713e-07F, 1.14512767e-07F, 1.15786343e-07F, 1.17059595e-07F, + 1.18332673e-07F, 1.19605723e-07F, 1.20878890e-07F, 1.22152313e-07F, + 1.23426131e-07F, 1.24700479e-07F, 1.25975490e-07F, 1.27251294e-07F, + 1.28528022e-07F, 1.29805799e-07F, 1.31084751e-07F, 1.32365001e-07F, + 1.33646673e-07F, 1.34929886e-07F, 1.36214760e-07F, 1.37501415e-07F, + 1.38789966e-07F, 1.40080532e-07F, 1.41373228e-07F, 1.42668169e-07F, + 1.43965470e-07F, 1.45265245e-07F, 1.46567606e-07F, 1.47872669e-07F, + 1.49180545e-07F, 1.50491348e-07F, 1.51805191e-07F, 1.53122186e-07F, + 1.54442445e-07F, 1.55766083e-07F, 1.57093212e-07F, 1.58423946e-07F, + 1.59758399e-07F, 1.61096684e-07F, 1.62438917e-07F, 1.63785214e-07F, + 1.65135690e-07F, 1.66490462e-07F, 1.67849647e-07F, 1.69213364e-07F, + 1.70581733e-07F, 1.71954874e-07F, 1.73332908e-07F, 1.74715958e-07F, + 1.76104148e-07F, 1.77497602e-07F, 1.78896448e-07F, 1.80300814e-07F, + 1.81710828e-07F, 1.83126623e-07F, 1.84548331e-07F, 1.85976086e-07F, + 1.87410026e-07F, 1.88850288e-07F, 1.90297012e-07F, 1.91750343e-07F, + 1.93210424e-07F, 1.94677403e-07F, 1.96151428e-07F, 1.97632653e-07F, + 1.99121231e-07F, 2.00617321e-07F, 2.02121082e-07F, 2.03632677e-07F, + 2.05152273e-07F, 2.06680040e-07F, 2.08216149e-07F, 2.09760777e-07F, + 2.11314104e-07F, 2.12876312e-07F, 2.14447590e-07F, 2.16028129e-07F, + 2.17618123e-07F, 2.19217773e-07F, 2.20827283e-07F, 2.22446862e-07F, + 2.24076723e-07F, 2.25717086e-07F, 2.27368174e-07F, 2.29030216e-07F, + 2.30703448e-07F, 2.32388110e-07F, 2.34084450e-07F, 2.35792720e-07F, + 2.37513182e-07F, 2.39246101e-07F, 2.40991752e-07F, 2.42750416e-07F, + 2.44522382e-07F, 2.46307948e-07F, 2.48107418e-07F, 2.49921109e-07F, + 2.51749342e-07F, 2.53592452e-07F, 2.55450781e-07F, 2.57324683e-07F, + 2.59214522e-07F, 2.61120673e-07F, 2.63043524e-07F, 2.64983476e-07F, + 2.66940939e-07F, 2.68916342e-07F, 2.70910123e-07F, 2.72922739e-07F, + 2.74954660e-07F, 2.77006373e-07F, 2.79078382e-07F, 2.81171210e-07F, + 2.83285396e-07F, 2.85421503e-07F, 2.87580110e-07F, 2.89761822e-07F, + 2.91967265e-07F, 2.94197089e-07F, 2.96451969e-07F, 2.98732610e-07F, + 3.01039742e-07F, 3.03374127e-07F, 3.05736557e-07F, 3.08127859e-07F, + 3.10548894e-07F, 3.13000563e-07F, 3.15483804e-07F, 3.17999599e-07F, + 3.20548974e-07F, 3.23133003e-07F, 3.25752811e-07F, 3.28409576e-07F, + 3.31104534e-07F, 3.33838984e-07F, 3.36614287e-07F, 3.39431878e-07F, + 3.42293264e-07F, 3.45200034e-07F, 3.48153864e-07F, 3.51156520e-07F, + 3.54209871e-07F, 3.57315892e-07F, 3.60476673e-07F, 3.63694431e-07F, + 3.66971518e-07F, 3.70310433e-07F, 3.73713834e-07F, 3.77184553e-07F, + 3.80725611e-07F, 3.84340234e-07F, 3.88031877e-07F, 3.91804239e-07F, + 3.95661291e-07F, 3.99607304e-07F, 4.03646879e-07F, 4.07784981e-07F, + 4.12026980e-07F, 4.16378695e-07F, 4.20846449e-07F, 4.25437124e-07F, + 4.30158235e-07F, 4.35018005e-07F, 4.40025460e-07F, 4.45190536e-07F, + 4.50524210e-07F, 4.56038644e-07F, 4.61747369e-07F, 4.67665494e-07F, + 4.73809965e-07F, 4.80199879e-07F, 4.86856855e-07F, 4.93805512e-07F, + 5.01074042e-07F, 5.08694944e-07F, 5.16705952e-07F, 5.25151216e-07F, + 5.34082859e-07F, 5.43563016e-07F, 5.53666578e-07F, 5.64484953e-07F, + 5.76131313e-07F, 5.88748108e-07F, 6.02518140e-07F, 6.17681418e-07F, + 6.34561837e-07F, 6.53611496e-07F, 6.75488730e-07F, 7.01206245e-07F, + 7.32441505e-07F, 7.72282898e-07F, 8.27435688e-07F, 9.17567905e-07F, +}; +static const float fe_float[] = { + 1.00000000e+00F, 9.38143681e-01F, 9.00469930e-01F, 8.71704332e-01F, + 8.47785501e-01F, 8.26993297e-01F, 8.08421652e-01F, 7.91527637e-01F, + 7.75956852e-01F, 7.61463389e-01F, 7.47868622e-01F, 7.35038092e-01F, + 7.22867660e-01F, 7.11274761e-01F, 7.00192655e-01F, 6.89566496e-01F, + 6.79350572e-01F, 6.69506317e-01F, 6.60000841e-01F, 6.50805833e-01F, + 6.41896716e-01F, 6.33251994e-01F, 6.24852739e-01F, 6.16682181e-01F, + 6.08725382e-01F, 6.00968966e-01F, 5.93400902e-01F, 5.86010318e-01F, + 5.78787359e-01F, 5.71723049e-01F, 5.64809193e-01F, 5.58038282e-01F, + 5.51403417e-01F, 5.44898238e-01F, 5.38516872e-01F, 5.32253880e-01F, + 5.26104214e-01F, 5.20063177e-01F, 5.14126394e-01F, 5.08289776e-01F, + 5.02549502e-01F, 4.96901987e-01F, 4.91343870e-01F, 4.85871987e-01F, + 4.80483364e-01F, 4.75175193e-01F, 4.69944825e-01F, 4.64789756e-01F, + 4.59707616e-01F, 4.54696157e-01F, 4.49753251e-01F, 4.44876873e-01F, + 4.40065101e-01F, 4.35316103e-01F, 4.30628137e-01F, 4.25999541e-01F, + 4.21428729e-01F, 4.16914186e-01F, 4.12454466e-01F, 4.08048183e-01F, + 4.03694013e-01F, 3.99390684e-01F, 3.95136982e-01F, 3.90931737e-01F, + 3.86773829e-01F, 3.82662181e-01F, 3.78595759e-01F, 3.74573568e-01F, + 3.70594648e-01F, 3.66658080e-01F, 3.62762973e-01F, 3.58908473e-01F, + 3.55093753e-01F, 3.51318016e-01F, 3.47580495e-01F, 3.43880445e-01F, + 3.40217149e-01F, 3.36589914e-01F, 3.32998069e-01F, 3.29440964e-01F, + 3.25917972e-01F, 3.22428485e-01F, 3.18971913e-01F, 3.15547685e-01F, + 3.12155249e-01F, 3.08794067e-01F, 3.05463619e-01F, 3.02163401e-01F, + 2.98892921e-01F, 2.95651704e-01F, 2.92439288e-01F, 2.89255223e-01F, + 2.86099074e-01F, 2.82970415e-01F, 2.79868833e-01F, 2.76793928e-01F, + 2.73745310e-01F, 2.70722597e-01F, 2.67725420e-01F, 2.64753419e-01F, + 2.61806243e-01F, 2.58883550e-01F, 2.55985007e-01F, 2.53110290e-01F, + 2.50259082e-01F, 2.47431076e-01F, 2.44625969e-01F, 2.41843469e-01F, + 2.39083290e-01F, 2.36345152e-01F, 2.33628783e-01F, 2.30933917e-01F, + 2.28260294e-01F, 2.25607660e-01F, 2.22975768e-01F, 2.20364376e-01F, + 2.17773247e-01F, 2.15202151e-01F, 2.12650862e-01F, 2.10119159e-01F, + 2.07606828e-01F, 2.05113656e-01F, 2.02639439e-01F, 2.00183975e-01F, + 1.97747066e-01F, 1.95328521e-01F, 1.92928150e-01F, 1.90545770e-01F, + 1.88181199e-01F, 1.85834263e-01F, 1.83504787e-01F, 1.81192603e-01F, + 1.78897547e-01F, 1.76619455e-01F, 1.74358169e-01F, 1.72113535e-01F, + 1.69885401e-01F, 1.67673619e-01F, 1.65478042e-01F, 1.63298529e-01F, + 1.61134940e-01F, 1.58987139e-01F, 1.56854992e-01F, 1.54738369e-01F, + 1.52637142e-01F, 1.50551185e-01F, 1.48480376e-01F, 1.46424594e-01F, + 1.44383722e-01F, 1.42357645e-01F, 1.40346251e-01F, 1.38349429e-01F, + 1.36367071e-01F, 1.34399072e-01F, 1.32445328e-01F, 1.30505738e-01F, + 1.28580205e-01F, 1.26668629e-01F, 1.24770919e-01F, 1.22886980e-01F, + 1.21016722e-01F, 1.19160057e-01F, 1.17316899e-01F, 1.15487164e-01F, + 1.13670768e-01F, 1.11867632e-01F, 1.10077676e-01F, 1.08300825e-01F, + 1.06537004e-01F, 1.04786139e-01F, 1.03048160e-01F, 1.01322997e-01F, + 9.96105837e-02F, 9.79108533e-02F, 9.62237426e-02F, 9.45491894e-02F, + 9.28871336e-02F, 9.12375166e-02F, 8.96002819e-02F, 8.79753745e-02F, + 8.63627411e-02F, 8.47623305e-02F, 8.31740930e-02F, 8.15979807e-02F, + 8.00339475e-02F, 7.84819492e-02F, 7.69419432e-02F, 7.54138887e-02F, + 7.38977470e-02F, 7.23934809e-02F, 7.09010552e-02F, 6.94204365e-02F, + 6.79515934e-02F, 6.64944964e-02F, 6.50491178e-02F, 6.36154320e-02F, + 6.21934154e-02F, 6.07830464e-02F, 5.93843056e-02F, 5.79971756e-02F, + 5.66216413e-02F, 5.52576897e-02F, 5.39053102e-02F, 5.25644946e-02F, + 5.12352371e-02F, 4.99175343e-02F, 4.86113856e-02F, 4.73167929e-02F, + 4.60337611e-02F, 4.47622977e-02F, 4.35024136e-02F, 4.22541224e-02F, + 4.10174414e-02F, 3.97923910e-02F, 3.85789955e-02F, 3.73772828e-02F, + 3.61872848e-02F, 3.50090377e-02F, 3.38425822e-02F, 3.26879635e-02F, + 3.15452322e-02F, 3.04144439e-02F, 2.92956602e-02F, 2.81889488e-02F, + 2.70943838e-02F, 2.60120466e-02F, 2.49420264e-02F, 2.38844205e-02F, + 2.28393354e-02F, 2.18068875e-02F, 2.07872041e-02F, 1.97804243e-02F, + 1.87867007e-02F, 1.78062004e-02F, 1.68391068e-02F, 1.58856218e-02F, + 1.49459680e-02F, 1.40203914e-02F, 1.31091649e-02F, 1.22125924e-02F, + 1.13310136e-02F, 1.04648102e-02F, 9.61441364e-03F, 8.78031499e-03F, + 7.96307744e-03F, 7.16335318e-03F, 6.38190594e-03F, 5.61964221e-03F, + 4.87765598e-03F, 4.15729512e-03F, 3.46026478e-03F, 2.78879879e-03F, + 2.14596774e-03F, 1.53629978e-03F, 9.67269282e-04F, 4.54134354e-04F, +}; diff --git a/numpy/random/src/dsfmt/128-bit-jump.poly.txt b/numpy/random/src/dsfmt/128-bit-jump.poly.txt new file mode 100644 index 000000000..fea1318fb --- /dev/null +++ b/numpy/random/src/dsfmt/128-bit-jump.poly.txt @@ -0,0 +1,2 @@ +jump polynomial: +f4dfa6c62049d0776e0bf6f1e953f3aa38abb113df86be024eab3773ad5f2b82ead936022e656dff7e562691c59dd5f7d2566b78d9669002503c4ddb1888a49f32333f515e6c60c4ecd221078ec6f26f0a90f4875067ca1f399a99775037adf905566e2c7e6b42131420f8f04f112c92621c9b1502f2a8aefad6c667904af62f0d55e02d396902d3b89450103c5ce5fe0408d97cbb864861b49e4e42048ff3310b48faac55095a7f422eea4aade752f947f947c6be0a0c665bdea099246ab9eff658ea8ca468bf49d0227748367878de06d7bd86ea6708fcac6e252f5f00f04309b2aac3036b64afb39d990427c6c9f03477cc7e935c43c0e61bc161db8eb15516eee8cb377ecbc1849207990fb6778721b29bfe0d89bfda1b3772fa5b0b1f7ec3daf36052032285898c6f6396f55010c31f8201b7e2e51d94f920bfe57684c5415cc342cb39a0045d9793d13cf8646096daeb8bb9bfc20a90de8f2426da8733267a9b9674f32154e8f84a9932223a2ca3c787d0b66df6675febbdfcba2f9cef09c621c57e11098b3289c77397aaae8b104642ffe0c4b75598efbc53745984d68b4d6656cae299ae2be55217a9a02b009ca7be32f47fbe434bce4914a34d0c9b0085bede9b8a99319c34660d66f0124b5a7714c4bf3cbfec3ee43ed817087168bad80133bebaeeb68cf7929a24d1bb3de831a8340d220906ab04159cf94b21d5ee813bd7c80f10f01b43052af530917513b169254c25d6fcfe6cb420d6ce92f54886ef6eaf9a5ba35e893ff593834d05ddf28899e42d729c7df3d21ef036020789739366f0c11ec52ff92a0bfd8ba69508e27b20fabb8217bd36b90e5aa918159ac87913bc7b46c04e366c23c92807fbe9c6a407e6a4db0b4fc23c3b6c706b5ca058fe8c190f849f18d16d6b48b5ed760eb202fd566291a799420b9654e08b8118bcbfead8e9dd2fdb9b053e9bdfb665285c78718f726d0b3d6c37e116428ec9ac9db2637259e4e8d6402bbada46c6bdb03985e19a82e9b4e57de1b025a3cb1f850beae7e8da9941655825bce0e89d536b6ee9064865b1a85c185e9fc9cb7f435de13d44773c00eed442a286e4ab807e3cab4dc3441d1b7d2af693812ae8b39652bb8c835fc895d13d6da93541afeadeee450475c29f3b2dfa8ef1c1e2547463b2cc2f0ff7a42ac4dd35e25c4fa030d2d2766fbe9f2d04c1304671747bace2f7dd55142bfa60f8cbc968bfc3d7a342152dc684a0fb5a32c0962a62b5220ac0f72add9d8b84d6cc76b97d03245e01fc8da3414a49bb4075d3488f29b56dc42ba69e3b58529448c943ecfd98b3784a39d0b8609a8fb945e757f4569f53bd2cf80f7f638acf5b67fe9c560a3b7b0cf7e0398f31aa8b03cf9c62b24296b6d8596b694469a02686c38daa16a1ef86e012d61a2f7de1693a5c00b3685175caec3c67146477eba54830f1d546cb18a553779aa46adb4f2010e33f3def847c7d89b51a8462b227605f6c920fd558a6daf64bc98682e508ae960c0c571870e603ba1fce0c13d53176f353fd319959e13db93eae1359f06e3dd4767c04f824cf34ec7bf8f60161ba1a615db82852eca9e3869afa711ab9a090660b0dc6cfbea310dda77e02310fbaeacd2636f975838c2dbcdbe9ac2cd85cee28f5e3f0c73abf62f9fa02cd79a7606b7ba855db68a07848b057c3aaf38f1a70086e14616f6f88305a1f9ce6b41378a620d4db3e0e7e1d421590dccaeff86212e232eeb5eb8a8d33a8c9b25ae88f3a7bd5032b4efa68f8af3186a02ffcbf5456f12beccace94c81c360cc4a0dcc642b59f991eec68c59af78139ca60b96d6a18e9535f8995e89bd2cf6a0aef3acffd33d1c0c1b79b66414a91d9f65b2b4ec65844b96f725d2b4b0c309f3eb9d714e9dd939bbdfd85ce8fb43679aeab13f6c29549949503c9466dbd337c4cdde46d6eacd15f21f4d8fdeaa627a47884c88a9c85f0b731d271a8ea7cb9e04a4a149c23c10f56b3a0476dc77a999d6e4f813e4b0f805e2a693e2ae4ae0ecc423c9ba5d17b42e691abf83784a582f2b1fd85d1e0a27ba38a500963568b2450363d2c5e3f7b8ba3e5b56e4e9f745a3a710bf2ae233c303068c532ce78ff031e6ab28b705dd94d7db4500909edb5626b8c9bd5ff4f0b4741388f0b91563ee516934c013e901572cba005ac5c535f4f107903be9af7b2793dfb61b5070facbe71eefe1b5600f975c8c38c3a2350d78beadfecb78e981164ae8bc866e732972d3ceef4aac68e15861f9b881d9b51b4edece150bc124b07645defb4202ef5d0e0962db98cae6ed459561c93c74c20bd64362e4f4fffc389a6cd80514604ff22eecc10c9cbc7981d19a8102b24146354c463107c9dc070e29e70df3578022acf72289ef071ab9f9402a544d0399f1b1e5f206b6d46d445f6d612a490e72918e00c853eda8493bef511149e80c9ab56e8b4b8cba3987249f77d060e61760e5792ac321c987c03c2606e9393a7970212992cdbd16448078d5039d4c2c3199714f53278f4f7b1d2e514cf95bdfc078b8bb0db659cb2c3f5cc02890ea84f05d414c88d2db9e9f8455659b9fa6254405317245fa070d6970cafb4dadb2522b490a5c8e02fe973a8cdbfbfbdbfb01535099ffba3d3896bc4d1189fc570c3e6fdc6469265b8da912772e75dd62ab71be507f700d56cac5e68fd6b57ec166168ab5258a69625c142a5b1b3519f94be1bde5e51d3bd8ea0c12d5af2fe4615b1b7bd4a96628a4fabc65925ff09718f63bbebaad98f89bd9543a27b3ff3b5d8bfa89f941a5eb8cc005ccd4a705190e1c9dc6a9f4264e5ee658520a4438e92de854bffc39f8dc7dfbb5de4f14ba63ea16a37d14a7b4610f95b6cffd55e4679b29cedbdf20e7bd16da822fad910c359ee3a68e48aae6e769b0e291d5d3aa3e2ca9d8d23abe8a1d5349f4991e9300852cc0befb20c2fc0d169306b260763344024f8092cbcc24c6807363e9fc548a30d5faab3a94b2af0782a2942be80c45d8b0587efd587394ef33c33022436e285806ddffdd32fe36345c3c38ed8d680abeb7a028b44ee6f94d060a14c7019bb6af1f1b5f0a562957d19826d8cc216f9b908c989ccd5415e3525dfe9422ffb5b50b7cc3083dc325544751e5683535d7439d3da2b0bb73bea551dd99e04e0e793804f4774eb6b1daf781d9caa5128274e599e847862fe309027813d3e4eda0bbeb7201856a5c5d8370e44dabff0bb229c723ba0a6bcf29c44536147de11b7835991018100105bd4329217f7386903fe8e7363cd7b3e893244e245e0a187467664c05b0be1fd429722b9b9a5e3198147fad72776e8a63aab9054fa9d259af0198d088d71d132e6068676a8e9ebb0f616b51ee34aac39c2c2221c71124017270d75ff4a048363c389e04e9b440ad2032a381ac2cfc54f409caa791e65ee4f5d6cd035008f219b88a803a7382ae447bf65a3df2176b25b3b7b67dabe34decd9a1384dc7a003916ca8fbcb29b3ad6fd8eac5bbbaa3bdfa6c6a3ad9427c4f3ed79fea26e14c8ce5fa3b4f82c5f7b6d2125916753a7b92ce9b46d45 diff --git a/numpy/random/src/dsfmt/96-bit-jump.poly.txt b/numpy/random/src/dsfmt/96-bit-jump.poly.txt new file mode 100644 index 000000000..15c68d155 --- /dev/null +++ b/numpy/random/src/dsfmt/96-bit-jump.poly.txt @@ -0,0 +1,2 @@ +jump polynomial: +288da521f7244e5f62bf26692bdd1fcdfd38a850addb02d98bd358367cb78c71348f3e163522e0e30e4feb90aa210dd793c94d151fa89aa319911aa42b511ea568a503d595c6d9bcd37317a37ef5679700a5b67f29df72451770fc1eb8c97427cdd9825c23f32dcd5c4fb117a4f5982a3bee8f16595d021165cd9688db342e360e222c9855c8306fd7b5fc82e62e3b1765e7f3319da9da66c325b030bd6175876efc70636306cd2de31a299ca20e9eb1d5063bcbff0ba282aff4737a5b1585cd940ae9cd45fda222308341e0d5588e81b42c4e0574deeb2d80b84c00cb3f8a7ae6278462e1994b83a25b33aa0dc74d5d3d057dabfd6a8a82d7dfb6bb66a223bc46dca2b5fb1885e6ab80fddcd6578b32c21c4a9d761cb9798800c921d56ee356c491454e15956e68ef566c1706fcdfb6a6828ec1fb93db455525828e8741a371c14959658b99bbd358a162230ee451a8432df279e4ba0d3a493954359a5390b16475540578270b168054fefb1e20948d4d20c88005ed09e671b6a94b8ea929f72e7b2f85af4098a92d742b64964ea6b7773b2c20b22a0ff35bd9367c3160b549411230e15a987f361e04daac49d2fe4c7c9371d84bf270d8f33a62680b2ee014bf5be691aa0d82e66e885eaa832a241aff8a09c435ac2b0698bc3865c5125d498a4ffadd31d5f2c6aee6496fdc6c13055b53e7f65a683ef539b6e8ea6e21429a11ff74ccef09ee83eac1b5ddaf1b77fed786fd20e8cbb3e8877b7f80a24fef31a9f8d8108099c50abc886f7ab20c4396bf51af1b806003185eaf2061f55036e26f498b92aabadfb6b5bed2d116b32ae387a692058e6160a9508dc44c971454d9a357ba115278861be0aeaa0926d773c5d4563e37dffcfed8bbf44b79e384650b7eff30aae73154a2ef130cee1eaf32d944e5472ae217789c03276deb8290c73dd5cde0b6dce9b28cbb73776e3e52260768f37a38db3d1c26c3c065b641c7a825edc155d947118d8b6ff8c8818440088724261ca83fa866aa4159fbffac8c28c8a7ca5f1e2fde74b4908c8215cbde20190bdf0de1d5a05a2c116a66eeadcafd643098e74ec3e82b3658c0c4fd7c7797d567b8db3d6b67ca262d713dbf45cc80b89f279be0991f276a4501d2ea6222026caa7e6fbcf4d02fdf65d8f439f88cfb1121d1b0f4dd670d146b93e099a32869448d837e347229745e5c30f1521b0c477b2062c9c8f631dcd29664eec7f28bdcac2a1ca2deabbbc89b21824ba92a61eeb4c5dd62b20c346134d842bcfc189f0e2244bfb8596c38c88d5bd4447fcd30890a4acf71801a6bcf5d806073b9ca730db53e160a79516333748708dd5e5be37d75e90e64d14ddf5ccc9390ae67cbba39916ce9b3b6b1d19378e4bd36ef0c9e62570394051cc273122e0790e381b20e083beca6e88bc2fa8bde22c89689b4b710c775cd9065158b48bf855fc3a3e80693908046ea1da9c558f024ea5ea69d65f40317fc3c8bab7606bf7edf17fcaeb706756c4de38319a51fc24c80a2baccef40f4354f5147fb91c9b1b91011d146da7eeb426d25de17d7e39ee53735ef28b64d5e6e5444676f679a8e4e91314377c7606531b90dc1cd8cf2d929ed9137fdc0d6b6db62e764ef200a439d742b51425b818231ed799a53768119466cc42296ce36f52507411709cd9d622c1f899291db27104589a5e4509d9818cef983d6a13ce1247184c4551c38bd319aa068cd9c0b96f9e08b4a7bd7852c129d4126676cbcb30ae029b0e2cec3c58c110fecae7967fca0752874e2fcc077084dd4d87b4dee632b621f35cb9f22639ab4de86089c56cabb1aa8a4fedbc5d8673cca12b4479dca5dc6f3048bb654fd59c464b033960177a4d6c2ee38b124d193cd5da3cbc5aa5ebdf7197f5d3f3008f01effb599b6d238044f2ee204bf372685256813278ca7a7d145445f5d5eb5490d99ee89d587fe446e1600d7daf9553b7d709bd796c59757e2f7c26cb71990a51ffc49120f358ef4927729b7e8c2cf9ad35a8017667625a39e2d12c8b0dd5de3d3797d9167ac9302a122e0f066a2552a7278a632b043089a1f5a127ce0bc8e688c545f20bca467fd58d9f9b8234f2be7a28152ab4d39ba8d6c4471d574571aa1a4a3aca16f46ac74e664899cf24502334aec637a9c392ba1618340080bfaed493abaa2f684ffb2bc1da86d804305772e30893f67235d5ce8180ef60490e46f131465d249a39df1aaed30621169c0f7db0b6a6adcab975ec00ca68b2fc06f443cfb8933b326f106670e0e528f22ff96d06b2a6d917258d29a87cf637f37c3661b42462c11c73c1cd0a6774e4e10157b02c0cfd9148ad8a23775653db3dec41fd6d1d5eb54a3e5e144a489a18412f21eb96a72b6751482882d636a1cd0057ea48d1a985472b0c301c4a5b24b0152bdc50a47126a01a1d0665270cdbf2ed852e0f61515bad6e58973329d269bf057ffa52331dde4700b926f635cdf49234f4aaabbabca5386526568fc3a80b1d8a9e1566e21bf889546379289263d978e95de390c4bbdb5a813535b7186f661f5c283400adb3bcf9365d45abb6088b729a98789265f1e8e684e2b2c15b4ce53230abc5e5bf6895827c95842e0849fc7fe56b7c65f075baded0f2e172c8e1088219615b8697ff4a3c6f5f708e498e6c7312680f214a877061511d3b85087545fc26708d039e977a0157b95ceba40497cc2dd1b1b1394eb98f651d701dfbc3583c159da8bae76a25db213211d6191d83c5d7a3d4b2320b8a305b5d90e04d6e3de161fff5dae7e3d4f6c9e1cf55cb5ac1677d9cfd560db2209be2b9566a267aad9cf4b9e03c221ba5b0f02cabb50cef19180ba329691d114da670d7b85e36c0b6b7d81613bd31350dfe225050861e90043ac2334478f52584a1a8809f76d40af36da02549c54da164487f0b7f823cff797db1c90f2f1c431eca97fc8bcdfb44c30cd1643c893d5e33aa56cbc0a0c38f5c8f6bb37483d13b85b659ac51ce05311bba19c97772e81c2315a009e80f591c82445d493dc3b5fb12c52e8c50c6260200b0d77092bf19aabce491b2c511fca2a4ebd99b446e55f453314297723894f0b223868ef80f5964468afa3a5e6aa41f2128e6de893bba2cbde9bea91ba97bda18a01905ce8d2e85e6011cc0550f5ae9121361fdec1ec97a6e6892a68732a69147476d54babaa564b883baad7100eb1092a1aa28a29f67e6b53deab511e53eada87cfe1bb6e3c6a215fd8d729840a5b5ac452cfd8acb9be7818d2c806c3e0cedd0847ddf9a5906bf1a0fa4da44ccea829a1a5350d5db0241a221b97e5cd5ba15e58b402529317c854fbcda86a562d4ee44a34193513647af0a3bc9f90ababc1dbbfd9aba8d3dcc39463473ca6bc0e1dc736ba712eef42dee80e31e7d8abe23f98e91ab875d0553bc24be9cb1d9484812c0b038cb177ad52064328e17f8ca7c8737902d964017e3aaae66161270dac21de42a6f60d10d89c1556916a249a130752bb7c7783b93a59d9f5456745ecc512f497b5a31be2678b9587628cb45dae2f5f6bde7ea4500c1ba961e2 diff --git a/numpy/random/src/dsfmt/LICENSE.md b/numpy/random/src/dsfmt/LICENSE.md new file mode 100644 index 000000000..d59568f6b --- /dev/null +++ b/numpy/random/src/dsfmt/LICENSE.md @@ -0,0 +1,34 @@ +# DSFMT + +Copyright (c) 2007, 2008, 2009 Mutsuo Saito, Makoto Matsumoto +and Hiroshima University. +Copyright (c) 2011, 2002 Mutsuo Saito, Makoto Matsumoto, Hiroshima +University and The University of Tokyo. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. +* Neither the name of the Hiroshima University nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/numpy/random/src/dsfmt/calc-jump.cpp b/numpy/random/src/dsfmt/calc-jump.cpp new file mode 100644 index 000000000..495b2797c --- /dev/null +++ b/numpy/random/src/dsfmt/calc-jump.cpp @@ -0,0 +1,81 @@ +/** + * @file calc-jump.cpp + * + * @brief calc jump function. + * + * @author Mutsuo Saito (Hiroshima University) + * @author Makoto Matsumoto (The University of Tokyo) + * + * Copyright (C) 2012 Mutsuo Saito, Makoto Matsumoto, + * Hiroshima University and The University of Tokyo. + * All rights reserved. + * + * The 3-clause BSD License is applied to this software, see + * LICENSE.txt + * + * Compile: + * g++ calc-jump.cpp -o calc-jump -lntl + * + * Compute polynomial for 2^128 steps: + * ./calc-jump 340282366920938463463374607431768211456 poly.19937.txt + * + */ +#include <iostream> +#include <fstream> +#include <iomanip> +#include <sstream> +#include <string> +#include <inttypes.h> +#include <stdint.h> +#include <time.h> +#include <NTL/GF2X.h> +#include <NTL/vec_GF2.h> +#include <NTL/ZZ.h> +#include "dSFMT-calc-jump.hpp" + +using namespace NTL; +using namespace std; +using namespace dsfmt; + +static void read_file(GF2X& lcmpoly, long line_no, const string& file); + +int main(int argc, char * argv[]) { + if (argc <= 2) { + cout << argv[0] << " jump-step poly-file" << endl; + cout << " jump-step: a number between zero and 2^{DSFMT_MEXP}-1.\n" + << " large decimal number is allowed." << endl; + cout << " poly-file: one of poly.{MEXP}.txt " + << "file" << endl; + return -1; + } + string step_string = argv[1]; + string filename = argv[2]; + long no = 0; + GF2X lcmpoly; + read_file(lcmpoly, no, filename); + ZZ step; + stringstream ss(step_string); + ss >> step; + string jump_str; + calc_jump(jump_str, step, lcmpoly); + cout << "jump polynomial:" << endl; + cout << jump_str << endl; + return 0; +} + + +static void read_file(GF2X& lcmpoly, long line_no, const string& file) +{ + ifstream ifs(file.c_str()); + string line; + for (int i = 0; i < line_no; i++) { + ifs >> line; + ifs >> line; + } + if (ifs) { + ifs >> line; + line = ""; + ifs >> line; + } + stringtopoly(lcmpoly, line); +} diff --git a/numpy/random/src/dsfmt/dSFMT-benchmark.c b/numpy/random/src/dsfmt/dSFMT-benchmark.c new file mode 100644 index 000000000..af29d0e1f --- /dev/null +++ b/numpy/random/src/dsfmt/dSFMT-benchmark.c @@ -0,0 +1,43 @@ +/* + * + * cl dsfmt-benchmark.c dSFMT.c /Ox -DHAVE_SSE2 + * + * gcc dSFMT-benchmark.c dSFMT.c -O3 -DHAVE_SSE2 -DDSFMT_MEXP=19937 -o + * dSFMT-benchmark + */ +#include <inttypes.h> +#include <time.h> + +#include "dSFMT.h" + + +#define N 1000000000 + +int main() { + int i, j; + uint32_t seed = 0xDEADBEAF; + uint64_t count = 0, sum = 0; + dsfmt_t state; + double buffer[DSFMT_N64]; + + uint64_t out; + uint64_t *tmp; + dsfmt_init_gen_rand(&state, seed); + clock_t begin = clock(); + for (i = 0; i < N / (DSFMT_N64 / 2); i++) { + dsfmt_fill_array_close_open(&state, &buffer[0], DSFMT_N64); + for (j = 0; j < DSFMT_N64; j += 2) { + tmp = (uint64_t *)&buffer[j]; + out = (*tmp >> 16) << 32; + tmp = (uint64_t *)&buffer[j + 1]; + out |= (*tmp >> 16) & 0xffffffff; + sum += out; + count++; + } + } + clock_t end = clock(); + double time_spent = (double)(end - begin) / CLOCKS_PER_SEC; + printf("0x%" PRIx64 "\ncount: %" PRIu64 "\n", sum, count); + printf("%" PRIu64 " randoms per second\n", + (uint64_t)(N / time_spent) / 1000000 * 1000000); +} diff --git a/numpy/random/src/dsfmt/dSFMT-calc-jump.hpp b/numpy/random/src/dsfmt/dSFMT-calc-jump.hpp new file mode 100644 index 000000000..b960826be --- /dev/null +++ b/numpy/random/src/dsfmt/dSFMT-calc-jump.hpp @@ -0,0 +1,106 @@ +#pragma once +#ifndef DSFMT_CALC_JUMP_HPP +#define DSFMT_CALC_JUMP_HPP +/** + * @file dSFMT-calc-jump.hpp + * + * @brief functions for calculating jump polynomial. + * + * @author Mutsuo Saito (Hiroshima University) + * @author Makoto Matsumoto (The University of Tokyo) + * + * Copyright (C) 2012 Mutsuo Saito, Makoto Matsumoto, + * Hiroshima University and The University of Tokyo. + * All rights reserved. + * + * The 3-clause BSD License is applied to this software, see + * LICENSE.txt + */ +#include <iostream> +#include <iomanip> +#include <sstream> +#include <NTL/GF2X.h> + +namespace dsfmt { +/** + * converts polynomial to string for convenient use in C language. + * @param x output string + * @param polynomial input polynomial + */ + static inline void polytostring(std::string& x, NTL::GF2X& polynomial) + { + using namespace NTL; + using namespace std; + + long degree = deg(polynomial); + int buff; + stringstream ss; + for (int i = 0; i <= degree; i+=4) { + buff = 0; + for (int j = 0; j < 4; j++) { + if (IsOne(coeff(polynomial, i + j))) { + buff |= 1 << j; + } else { + buff &= (0x0f ^ (1 << j)); + } + } + ss << hex << buff; + } + ss << flush; + x = ss.str(); + } + +/** + * converts string to polynomial + * @param str string + * @param poly output polynomial + */ + static inline void stringtopoly(NTL::GF2X& poly, std::string& str) + { + using namespace NTL; + using namespace std; + + stringstream ss(str); + char c; + long p = 0; + clear(poly); + while(ss) { + ss >> c; + if (!ss) { + break; + } + if (c >= 'a') { + c = c - 'a' + 10; + } else { + c = c - '0'; + } + for (int j = 0; j < 4; j++) { + if (c & (1 << j)) { + SetCoeff(poly, p, 1); + } else { + SetCoeff(poly, p, 0); + } + p++; + } + } + } + +/** + * calculate the jump polynomial. + * SFMT generates 4 32-bit integers from one internal state. + * @param jump_str output string which represents jump polynomial. + * @param step jump step of internal state + * @param characteristic polynomial + */ + static inline void calc_jump(std::string& jump_str, + NTL::ZZ& step, + NTL::GF2X& characteristic) + { + using namespace NTL; + using namespace std; + GF2X jump; + PowerXMod(jump, step, characteristic); + polytostring(jump_str, jump); + } +} +#endif diff --git a/numpy/random/src/dsfmt/dSFMT-common.h b/numpy/random/src/dsfmt/dSFMT-common.h new file mode 100644 index 000000000..30c26c08b --- /dev/null +++ b/numpy/random/src/dsfmt/dSFMT-common.h @@ -0,0 +1,115 @@ +#pragma once +/** + * @file dSFMT-common.h + * + * @brief SIMD oriented Fast Mersenne Twister(SFMT) pseudorandom + * number generator with jump function. This file includes common functions + * used in random number generation and jump. + * + * @author Mutsuo Saito (Hiroshima University) + * @author Makoto Matsumoto (The University of Tokyo) + * + * Copyright (C) 2006, 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima + * University. + * Copyright (C) 2012 Mutsuo Saito, Makoto Matsumoto, Hiroshima + * University and The University of Tokyo. + * All rights reserved. + * + * The 3-clause BSD License is applied to this software, see + * LICENSE.txt + */ +#ifndef DSFMT_COMMON_H +#define DSFMT_COMMON_H + +#include "dSFMT.h" + +#if defined(HAVE_SSE2) +# include <emmintrin.h> +union X128I_T { + uint64_t u[2]; + __m128i i128; +}; +union X128D_T { + double d[2]; + __m128d d128; +}; +/** mask data for sse2 */ +static const union X128I_T sse2_param_mask = {{DSFMT_MSK1, DSFMT_MSK2}}; +#endif + +#if defined(HAVE_ALTIVEC) +inline static void do_recursion(w128_t *r, w128_t *a, w128_t * b, + w128_t *lung) { + const vector unsigned char sl1 = ALTI_SL1; + const vector unsigned char sl1_perm = ALTI_SL1_PERM; + const vector unsigned int sl1_msk = ALTI_SL1_MSK; + const vector unsigned char sr1 = ALTI_SR; + const vector unsigned char sr1_perm = ALTI_SR_PERM; + const vector unsigned int sr1_msk = ALTI_SR_MSK; + const vector unsigned char perm = ALTI_PERM; + const vector unsigned int msk1 = ALTI_MSK; + vector unsigned int w, x, y, z; + + z = a->s; + w = lung->s; + x = vec_perm(w, (vector unsigned int)perm, perm); + y = vec_perm(z, (vector unsigned int)sl1_perm, sl1_perm); + y = vec_sll(y, sl1); + y = vec_and(y, sl1_msk); + w = vec_xor(x, b->s); + w = vec_xor(w, y); + x = vec_perm(w, (vector unsigned int)sr1_perm, sr1_perm); + x = vec_srl(x, sr1); + x = vec_and(x, sr1_msk); + y = vec_and(w, msk1); + z = vec_xor(z, y); + r->s = vec_xor(z, x); + lung->s = w; +} +#elif defined(HAVE_SSE2) +/** + * This function represents the recursion formula. + * @param r output 128-bit + * @param a a 128-bit part of the internal state array + * @param b a 128-bit part of the internal state array + * @param d a 128-bit part of the internal state array (I/O) + */ +inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *u) { + __m128i v, w, x, y, z; + + x = a->si; + z = _mm_slli_epi64(x, DSFMT_SL1); + y = _mm_shuffle_epi32(u->si, SSE2_SHUFF); + z = _mm_xor_si128(z, b->si); + y = _mm_xor_si128(y, z); + + v = _mm_srli_epi64(y, DSFMT_SR); + w = _mm_and_si128(y, sse2_param_mask.i128); + v = _mm_xor_si128(v, x); + v = _mm_xor_si128(v, w); + r->si = v; + u->si = y; +} +#else +/** + * This function represents the recursion formula. + * @param r output 128-bit + * @param a a 128-bit part of the internal state array + * @param b a 128-bit part of the internal state array + * @param lung a 128-bit part of the internal state array (I/O) + */ +inline static void do_recursion(w128_t *r, w128_t *a, w128_t * b, + w128_t *lung) { + uint64_t t0, t1, L0, L1; + + t0 = a->u[0]; + t1 = a->u[1]; + L0 = lung->u[0]; + L1 = lung->u[1]; + lung->u[0] = (t0 << DSFMT_SL1) ^ (L1 >> 32) ^ (L1 << 32) ^ b->u[0]; + lung->u[1] = (t1 << DSFMT_SL1) ^ (L0 >> 32) ^ (L0 << 32) ^ b->u[1]; + r->u[0] = (lung->u[0] >> DSFMT_SR) ^ (lung->u[0] & DSFMT_MSK1) ^ t0; + r->u[1] = (lung->u[1] >> DSFMT_SR) ^ (lung->u[1] & DSFMT_MSK2) ^ t1; +} +#endif +#endif diff --git a/numpy/random/src/dsfmt/dSFMT-jump.c b/numpy/random/src/dsfmt/dSFMT-jump.c new file mode 100644 index 000000000..1832bb885 --- /dev/null +++ b/numpy/random/src/dsfmt/dSFMT-jump.c @@ -0,0 +1,184 @@ +/** + * @file dSFMT-jump.c + * + * @brief do jump using jump polynomial. + * + * @author Mutsuo Saito (Hiroshima University) + * @author Makoto Matsumoto (The University of Tokyo) + * + * Copyright (C) 2012 Mutsuo Saito, Makoto Matsumoto, + * Hiroshima University and The University of Tokyo. + * All rights reserved. + * + * The 3-clause BSD License is applied to this software, see + * LICENSE.txt + */ + +#include <assert.h> +#include <stdlib.h> +#include <ctype.h> +#include <string.h> +#include "dSFMT-params.h" +#include "dSFMT.h" +#include "dSFMT-jump.h" +#include "dSFMT-common.h" + +#if defined(__cplusplus) +extern "C" { +#endif + + struct FIX_T { + int mexp; + uint64_t fix[4]; + }; + + struct FIX_T fix_table[] = { + {521, {UINT64_C(0x3fff56977f035125), + UINT64_C(0x3ff553857b015035), + UINT64_C(0x4034434434434434), + UINT64_C(0x0140151151351371)}}, + {1279, {UINT64_C(0x3ff87befce70e89f), + UINT64_C(0x3ff5f6afa3c60868), + UINT64_C(0xa4ca4caccaccacdb), + UINT64_C(0x40444444444c44c4)}}, + {4253, {UINT64_C(0x3ff85a66da51a81a), + UINT64_C(0x3ff4f4aeab9688eb), + UINT64_C(0x20524524534d34d3), + UINT64_C(0xc9cc9cc9cc9ccdcf)}}, + {216091, {UINT64_C(0x3ff096d54a871071), + UINT64_C(0x3ffafa9bfbd5d55d), + UINT64_C(0x0470470470573573), + UINT64_C(0x0250250251259259)}}, + {0} + }; + + inline static void next_state(dsfmt_t * dsfmt); + +#if defined(HAVE_SSE2) +/** + * add internal state of src to dest as F2-vector. + * @param dest destination state + * @param src source state + */ + inline static void add(dsfmt_t *dest, dsfmt_t *src) { + int dp = dest->idx / 2; + int sp = src->idx / 2; + int diff = (sp - dp + DSFMT_N) % DSFMT_N; + int p; + int i; + for (i = 0; i < DSFMT_N - diff; i++) { + p = i + diff; + dest->status[i].si + = _mm_xor_si128(dest->status[i].si, src->status[p].si); + } + for (i = DSFMT_N - diff; i < DSFMT_N; i++) { + p = i + diff - DSFMT_N; + dest->status[i].si + = _mm_xor_si128(dest->status[i].si, src->status[p].si); + } + dest->status[DSFMT_N].si + = _mm_xor_si128(dest->status[DSFMT_N].si, + src->status[DSFMT_N].si); + } +#else + inline static void add(dsfmt_t *dest, dsfmt_t *src) { + int dp = dest->idx / 2; + int sp = src->idx / 2; + int diff = (sp - dp + DSFMT_N) % DSFMT_N; + int p; + int i; + for (i = 0; i < DSFMT_N - diff; i++) { + p = i + diff; + dest->status[i].u[0] ^= src->status[p].u[0]; + dest->status[i].u[1] ^= src->status[p].u[1]; + } + for (; i < DSFMT_N; i++) { + p = i + diff - DSFMT_N; + dest->status[i].u[0] ^= src->status[p].u[0]; + dest->status[i].u[1] ^= src->status[p].u[1]; + } + dest->status[DSFMT_N].u[0] ^= src->status[DSFMT_N].u[0]; + dest->status[DSFMT_N].u[1] ^= src->status[DSFMT_N].u[1]; + } +#endif + +/** + * calculate next state + * @param dsfmt dSFMT internal state + */ + inline static void next_state(dsfmt_t * dsfmt) { + int idx = (dsfmt->idx / 2) % DSFMT_N; + w128_t * lung; + w128_t * pstate = &dsfmt->status[0]; + + lung = &pstate[DSFMT_N]; + do_recursion(&pstate[idx], + &pstate[idx], + &pstate[(idx + DSFMT_POS1) % DSFMT_N], + lung); + dsfmt->idx = (dsfmt->idx + 2) % DSFMT_N64; + } + + inline static void add_fix(dsfmt_t * dsfmt) { + int i; + int index = -1; + for (i = 0; fix_table[i].mexp != 0; i++) { + if (fix_table[i].mexp == DSFMT_MEXP) { + index = i; + } + if (fix_table[i].mexp > DSFMT_MEXP) { + break; + } + } + if (index < 0) { + return; + } + for (i = 0; i < DSFMT_N; i++) { + dsfmt->status[i].u[0] ^= fix_table[index].fix[0]; + dsfmt->status[i].u[1] ^= fix_table[index].fix[1]; + } + dsfmt->status[DSFMT_N].u[0] ^= fix_table[index].fix[2]; + dsfmt->status[DSFMT_N].u[1] ^= fix_table[index].fix[3]; + } + +/** + * jump ahead using jump_string + * @param dsfmt dSFMT internal state input and output. + * @param jump_string string which represents jump polynomial. + */ + void dSFMT_jump(dsfmt_t * dsfmt, const char * jump_string) { + dsfmt_t work; + int index = dsfmt->idx; + int bits; + int i; + int j; + memset(&work, 0, sizeof(dsfmt_t)); + add_fix(dsfmt); + dsfmt->idx = DSFMT_N64; + + for (i = 0; jump_string[i] != '\0'; i++) { + bits = jump_string[i]; + assert(isxdigit(bits)); + bits = tolower(bits); + if (bits >= 'a' && bits <= 'f') { + bits = bits - 'a' + 10; + } else { + bits = bits - '0'; + } + bits = bits & 0x0f; + for (j = 0; j < 4; j++) { + if ((bits & 1) != 0) { + add(&work, dsfmt); + } + next_state(dsfmt); + bits = bits >> 1; + } + } + *dsfmt = work; + add_fix(dsfmt); + dsfmt->idx = index; + } + +#if defined(__cplusplus) +} +#endif diff --git a/numpy/random/src/dsfmt/dSFMT-jump.h b/numpy/random/src/dsfmt/dSFMT-jump.h new file mode 100644 index 000000000..689f9499a --- /dev/null +++ b/numpy/random/src/dsfmt/dSFMT-jump.h @@ -0,0 +1,29 @@ +#pragma once +#ifndef DSFMT_JUMP_H +#define DSFMT_JUMP_H +/** + * @file SFMT-jump.h + * + * @brief jump header file. + * + * @author Mutsuo Saito (Hiroshima University) + * @author Makoto Matsumoto (The University of Tokyo) + * + * Copyright (C) 2012 Mutsuo Saito, Makoto Matsumoto, + * Hiroshima University and The University of Tokyo. + * All rights reserved. + * + * The 3-clause BSD License is applied to this software, see + * LICENSE.txt + */ +#if defined(__cplusplus) +extern "C" { +#endif + +#include "dSFMT.h" +void dSFMT_jump(dsfmt_t *dsfmt, const char *jump_str); + +#if defined(__cplusplus) +} +#endif +#endif diff --git a/numpy/random/src/dsfmt/dSFMT-params.h b/numpy/random/src/dsfmt/dSFMT-params.h new file mode 100644 index 000000000..aa0247800 --- /dev/null +++ b/numpy/random/src/dsfmt/dSFMT-params.h @@ -0,0 +1,87 @@ +#ifndef DSFMT_PARAMS_H +#define DSFMT_PARAMS_H + +#include "dSFMT.h" + +/*---------------------- + the parameters of DSFMT + following definitions are in dSFMT-paramsXXXX.h file. + ----------------------*/ +/** the pick up position of the array. +#define DSFMT_POS1 122 +*/ + +/** the parameter of shift left as four 32-bit registers. +#define DSFMT_SL1 18 + */ + +/** the parameter of shift right as four 32-bit registers. +#define DSFMT_SR1 12 +*/ + +/** A bitmask, used in the recursion. These parameters are introduced + * to break symmetry of SIMD. +#define DSFMT_MSK1 (uint64_t)0xdfffffefULL +#define DSFMT_MSK2 (uint64_t)0xddfecb7fULL +*/ + +/** These definitions are part of a 128-bit period certification vector. +#define DSFMT_PCV1 UINT64_C(0x00000001) +#define DSFMT_PCV2 UINT64_C(0x00000000) +*/ + +#define DSFMT_LOW_MASK UINT64_C(0x000FFFFFFFFFFFFF) +#define DSFMT_HIGH_CONST UINT64_C(0x3FF0000000000000) +#define DSFMT_SR 12 + +/* for sse2 */ +#if defined(HAVE_SSE2) + #define SSE2_SHUFF 0x1b +#elif defined(HAVE_ALTIVEC) + #if defined(__APPLE__) /* For OSX */ + #define ALTI_SR (vector unsigned char)(4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4) + #define ALTI_SR_PERM \ + (vector unsigned char)(15,0,1,2,3,4,5,6,15,8,9,10,11,12,13,14) + #define ALTI_SR_MSK \ + (vector unsigned int)(0x000fffffU,0xffffffffU,0x000fffffU,0xffffffffU) + #define ALTI_PERM \ + (vector unsigned char)(12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3) + #else + #define ALTI_SR {4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4} + #define ALTI_SR_PERM {15,0,1,2,3,4,5,6,15,8,9,10,11,12,13,14} + #define ALTI_SR_MSK {0x000fffffU,0xffffffffU,0x000fffffU,0xffffffffU} + #define ALTI_PERM {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3} + #endif +#endif + +#if DSFMT_MEXP == 521 + #include "dSFMT-params521.h" +#elif DSFMT_MEXP == 1279 + #include "dSFMT-params1279.h" +#elif DSFMT_MEXP == 2203 + #include "dSFMT-params2203.h" +#elif DSFMT_MEXP == 4253 + #include "dSFMT-params4253.h" +#elif DSFMT_MEXP == 11213 + #include "dSFMT-params11213.h" +#elif DSFMT_MEXP == 19937 + #include "dSFMT-params19937.h" +#elif DSFMT_MEXP == 44497 + #include "dSFMT-params44497.h" +#elif DSFMT_MEXP == 86243 + #include "dSFMT-params86243.h" +#elif DSFMT_MEXP == 132049 + #include "dSFMT-params132049.h" +#elif DSFMT_MEXP == 216091 + #include "dSFMT-params216091.h" +#else +#ifdef __GNUC__ + #error "DSFMT_MEXP is not valid." + #undef DSFMT_MEXP +#else + #undef DSFMT_MEXP +#endif + +#endif + +#endif /* DSFMT_PARAMS_H */ diff --git a/numpy/random/src/dsfmt/dSFMT-params19937.h b/numpy/random/src/dsfmt/dSFMT-params19937.h new file mode 100644 index 000000000..a600b0dbc --- /dev/null +++ b/numpy/random/src/dsfmt/dSFMT-params19937.h @@ -0,0 +1,40 @@ +#ifndef DSFMT_PARAMS19937_H +#define DSFMT_PARAMS19937_H + +/* #define DSFMT_N 191 */ +/* #define DSFMT_MAXDEGREE 19992 */ +#define DSFMT_POS1 117 +#define DSFMT_SL1 19 +#define DSFMT_MSK1 UINT64_C(0x000ffafffffffb3f) +#define DSFMT_MSK2 UINT64_C(0x000ffdfffc90fffd) +#define DSFMT_MSK32_1 0x000ffaffU +#define DSFMT_MSK32_2 0xfffffb3fU +#define DSFMT_MSK32_3 0x000ffdffU +#define DSFMT_MSK32_4 0xfc90fffdU +#define DSFMT_FIX1 UINT64_C(0x90014964b32f4329) +#define DSFMT_FIX2 UINT64_C(0x3b8d12ac548a7c7a) +#define DSFMT_PCV1 UINT64_C(0x3d84e1ac0dc82880) +#define DSFMT_PCV2 UINT64_C(0x0000000000000001) +#define DSFMT_IDSTR "dSFMT2-19937:117-19:ffafffffffb3f-ffdfffc90fffd" + + +/* PARAMETERS FOR ALTIVEC */ +#if defined(__APPLE__) /* For OSX */ + #define ALTI_SL1 (vector unsigned char)(3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3) + #define ALTI_SL1_PERM \ + (vector unsigned char)(2,3,4,5,6,7,30,30,10,11,12,13,14,15,0,1) + #define ALTI_SL1_MSK \ + (vector unsigned int)(0xffffffffU,0xfff80000U,0xffffffffU,0xfff80000U) + #define ALTI_MSK (vector unsigned int)(DSFMT_MSK32_1, \ + DSFMT_MSK32_2, DSFMT_MSK32_3, DSFMT_MSK32_4) +#else /* For OTHER OSs(Linux?) */ + #define ALTI_SL1 {3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3} + #define ALTI_SL1_PERM \ + {2,3,4,5,6,7,30,30,10,11,12,13,14,15,0,1} + #define ALTI_SL1_MSK \ + {0xffffffffU,0xfff80000U,0xffffffffU,0xfff80000U} + #define ALTI_MSK \ + {DSFMT_MSK32_1, DSFMT_MSK32_2, DSFMT_MSK32_3, DSFMT_MSK32_4} +#endif + +#endif /* DSFMT_PARAMS19937_H */ diff --git a/numpy/random/src/dsfmt/dSFMT-poly.h b/numpy/random/src/dsfmt/dSFMT-poly.h new file mode 100644 index 000000000..f8e15c3eb --- /dev/null +++ b/numpy/random/src/dsfmt/dSFMT-poly.h @@ -0,0 +1,53 @@ +static const char * poly_128 = +"f4dfa6c62049d0776e0bf6f1e953f3aa38abb113df86be024eab3773ad5f2b82ead936022e656dff7e562691c59dd5f7d2" +"566b78d9669002503c4ddb1888a49f32333f515e6c60c4ecd221078ec6f26f0a90f4875067ca1f399a99775037adf90556" +"6e2c7e6b42131420f8f04f112c92621c9b1502f2a8aefad6c667904af62f0d55e02d396902d3b89450103c5ce5fe0408d9" +"7cbb864861b49e4e42048ff3310b48faac55095a7f422eea4aade752f947f947c6be0a0c665bdea099246ab9eff658ea8c" +"a468bf49d0227748367878de06d7bd86ea6708fcac6e252f5f00f04309b2aac3036b64afb39d990427c6c9f03477cc7e93" +"5c43c0e61bc161db8eb15516eee8cb377ecbc1849207990fb6778721b29bfe0d89bfda1b3772fa5b0b1f7ec3daf3605203" +"2285898c6f6396f55010c31f8201b7e2e51d94f920bfe57684c5415cc342cb39a0045d9793d13cf8646096daeb8bb9bfc2" +"0a90de8f2426da8733267a9b9674f32154e8f84a9932223a2ca3c787d0b66df6675febbdfcba2f9cef09c621c57e11098b" +"3289c77397aaae8b104642ffe0c4b75598efbc53745984d68b4d6656cae299ae2be55217a9a02b009ca7be32f47fbe434b" +"ce4914a34d0c9b0085bede9b8a99319c34660d66f0124b5a7714c4bf3cbfec3ee43ed817087168bad80133bebaeeb68cf7" +"929a24d1bb3de831a8340d220906ab04159cf94b21d5ee813bd7c80f10f01b43052af530917513b169254c25d6fcfe6cb4" +"20d6ce92f54886ef6eaf9a5ba35e893ff593834d05ddf28899e42d729c7df3d21ef036020789739366f0c11ec52ff92a0b" +"fd8ba69508e27b20fabb8217bd36b90e5aa918159ac87913bc7b46c04e366c23c92807fbe9c6a407e6a4db0b4fc23c3b6c" +"706b5ca058fe8c190f849f18d16d6b48b5ed760eb202fd566291a799420b9654e08b8118bcbfead8e9dd2fdb9b053e9bdf" +"b665285c78718f726d0b3d6c37e116428ec9ac9db2637259e4e8d6402bbada46c6bdb03985e19a82e9b4e57de1b025a3cb" +"1f850beae7e8da9941655825bce0e89d536b6ee9064865b1a85c185e9fc9cb7f435de13d44773c00eed442a286e4ab807e" +"3cab4dc3441d1b7d2af693812ae8b39652bb8c835fc895d13d6da93541afeadeee450475c29f3b2dfa8ef1c1e2547463b2" +"cc2f0ff7a42ac4dd35e25c4fa030d2d2766fbe9f2d04c1304671747bace2f7dd55142bfa60f8cbc968bfc3d7a342152dc6" +"84a0fb5a32c0962a62b5220ac0f72add9d8b84d6cc76b97d03245e01fc8da3414a49bb4075d3488f29b56dc42ba69e3b58" +"529448c943ecfd98b3784a39d0b8609a8fb945e757f4569f53bd2cf80f7f638acf5b67fe9c560a3b7b0cf7e0398f31aa8b" +"03cf9c62b24296b6d8596b694469a02686c38daa16a1ef86e012d61a2f7de1693a5c00b3685175caec3c67146477eba548" +"30f1d546cb18a553779aa46adb4f2010e33f3def847c7d89b51a8462b227605f6c920fd558a6daf64bc98682e508ae960c" +"0c571870e603ba1fce0c13d53176f353fd319959e13db93eae1359f06e3dd4767c04f824cf34ec7bf8f60161ba1a615db8" +"2852eca9e3869afa711ab9a090660b0dc6cfbea310dda77e02310fbaeacd2636f975838c2dbcdbe9ac2cd85cee28f5e3f0" +"c73abf62f9fa02cd79a7606b7ba855db68a07848b057c3aaf38f1a70086e14616f6f88305a1f9ce6b41378a620d4db3e0e" +"7e1d421590dccaeff86212e232eeb5eb8a8d33a8c9b25ae88f3a7bd5032b4efa68f8af3186a02ffcbf5456f12beccace94" +"c81c360cc4a0dcc642b59f991eec68c59af78139ca60b96d6a18e9535f8995e89bd2cf6a0aef3acffd33d1c0c1b79b6641" +"4a91d9f65b2b4ec65844b96f725d2b4b0c309f3eb9d714e9dd939bbdfd85ce8fb43679aeab13f6c29549949503c9466dbd" +"337c4cdde46d6eacd15f21f4d8fdeaa627a47884c88a9c85f0b731d271a8ea7cb9e04a4a149c23c10f56b3a0476dc77a99" +"9d6e4f813e4b0f805e2a693e2ae4ae0ecc423c9ba5d17b42e691abf83784a582f2b1fd85d1e0a27ba38a500963568b2450" +"363d2c5e3f7b8ba3e5b56e4e9f745a3a710bf2ae233c303068c532ce78ff031e6ab28b705dd94d7db4500909edb5626b8c" +"9bd5ff4f0b4741388f0b91563ee516934c013e901572cba005ac5c535f4f107903be9af7b2793dfb61b5070facbe71eefe" +"1b5600f975c8c38c3a2350d78beadfecb78e981164ae8bc866e732972d3ceef4aac68e15861f9b881d9b51b4edece150bc" +"124b07645defb4202ef5d0e0962db98cae6ed459561c93c74c20bd64362e4f4fffc389a6cd80514604ff22eecc10c9cbc7" +"981d19a8102b24146354c463107c9dc070e29e70df3578022acf72289ef071ab9f9402a544d0399f1b1e5f206b6d46d445" +"f6d612a490e72918e00c853eda8493bef511149e80c9ab56e8b4b8cba3987249f77d060e61760e5792ac321c987c03c260" +"6e9393a7970212992cdbd16448078d5039d4c2c3199714f53278f4f7b1d2e514cf95bdfc078b8bb0db659cb2c3f5cc0289" +"0ea84f05d414c88d2db9e9f8455659b9fa6254405317245fa070d6970cafb4dadb2522b490a5c8e02fe973a8cdbfbfbdbf" +"b01535099ffba3d3896bc4d1189fc570c3e6fdc6469265b8da912772e75dd62ab71be507f700d56cac5e68fd6b57ec1661" +"68ab5258a69625c142a5b1b3519f94be1bde5e51d3bd8ea0c12d5af2fe4615b1b7bd4a96628a4fabc65925ff09718f63bb" +"ebaad98f89bd9543a27b3ff3b5d8bfa89f941a5eb8cc005ccd4a705190e1c9dc6a9f4264e5ee658520a4438e92de854bff" +"c39f8dc7dfbb5de4f14ba63ea16a37d14a7b4610f95b6cffd55e4679b29cedbdf20e7bd16da822fad910c359ee3a68e48a" +"ae6e769b0e291d5d3aa3e2ca9d8d23abe8a1d5349f4991e9300852cc0befb20c2fc0d169306b260763344024f8092cbcc2" +"4c6807363e9fc548a30d5faab3a94b2af0782a2942be80c45d8b0587efd587394ef33c33022436e285806ddffdd32fe363" +"45c3c38ed8d680abeb7a028b44ee6f94d060a14c7019bb6af1f1b5f0a562957d19826d8cc216f9b908c989ccd5415e3525" +"dfe9422ffb5b50b7cc3083dc325544751e5683535d7439d3da2b0bb73bea551dd99e04e0e793804f4774eb6b1daf781d9c" +"aa5128274e599e847862fe309027813d3e4eda0bbeb7201856a5c5d8370e44dabff0bb229c723ba0a6bcf29c44536147de" +"11b7835991018100105bd4329217f7386903fe8e7363cd7b3e893244e245e0a187467664c05b0be1fd429722b9b9a5e319" +"8147fad72776e8a63aab9054fa9d259af0198d088d71d132e6068676a8e9ebb0f616b51ee34aac39c2c2221c7112401727" +"0d75ff4a048363c389e04e9b440ad2032a381ac2cfc54f409caa791e65ee4f5d6cd035008f219b88a803a7382ae447bf65" +"a3df2176b25b3b7b67dabe34decd9a1384dc7a003916ca8fbcb29b3ad6fd8eac5bbbaa3bdfa6c6a3ad9427c4f3ed79fea2" +"6e14c8ce5fa3b4f82c5f7b6d2125916753a7b92ce9b46d45";
\ No newline at end of file diff --git a/numpy/random/src/dsfmt/dSFMT-test-gen.c b/numpy/random/src/dsfmt/dSFMT-test-gen.c new file mode 100644 index 000000000..697a3010a --- /dev/null +++ b/numpy/random/src/dsfmt/dSFMT-test-gen.c @@ -0,0 +1,58 @@ +/* + * cl dSFMT-test-gen.c dSFMT.c -DHAVE_SSE2 -DDSFMT_MEXP=19937 /Ox + * + * gcc dSFMT-test-gen.c dSFMT.c -DHAVE_SSE2 -DDSFMT_MEXP=19937 -o dSFMT + */ + +#include <inttypes.h> +#include <stdio.h> + +#include "dSFMT.h" + + +int main(void) { + int i; + double d; + uint64_t *temp; + uint32_t seed = 1UL; + dsfmt_t state; + dsfmt_init_gen_rand(&state, seed); + double out[1000]; + dsfmt_fill_array_close1_open2(&state, out, 1000); + + FILE *fp; + fp = fopen("dSFMT-testset-1.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, %" PRIu32 "\n", seed); + for (i = 0; i < 1000; i++) { + d = out[i]; + temp = (uint64_t *)&d; + fprintf(fp, "%d, %" PRIu64 "\n", i, *temp); + if (i==999) { + printf("%d, %" PRIu64 "\n", i, *temp); + } + } + fclose(fp); + + seed = 123456789UL; + dsfmt_init_gen_rand(&state, seed); + dsfmt_fill_array_close1_open2(&state, out, 1000); + fp = fopen("dSFMT-testset-2.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, %" PRIu32 "\n", seed); + for (i = 0; i < 1000; i++) { + d = out[i]; + temp = (uint64_t *)&d; + fprintf(fp, "%d, %" PRIu64 "\n", i, *temp); + if (i==999) { + printf("%d, %" PRIu64 "\n", i, *temp); + } + } + fclose(fp); +} diff --git a/numpy/random/src/dsfmt/dSFMT.c b/numpy/random/src/dsfmt/dSFMT.c new file mode 100644 index 000000000..0f122c26c --- /dev/null +++ b/numpy/random/src/dsfmt/dSFMT.c @@ -0,0 +1,626 @@ +/** + * @file dSFMT.c + * @brief double precision SIMD-oriented Fast Mersenne Twister (dSFMT) + * based on IEEE 754 format. + * + * @author Mutsuo Saito (Hiroshima University) + * @author Makoto Matsumoto (Hiroshima University) + * + * Copyright (C) 2007,2008 Mutsuo Saito, Makoto Matsumoto and Hiroshima + * University. All rights reserved. + * + * The new BSD License is applied to this software, see LICENSE.txt + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "dSFMT-params.h" + +#include "dSFMT-common.h" +#include "dSFMT-jump.h" +#include "dSFMT-poly.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/** dsfmt internal state vector */ +dsfmt_t dsfmt_global_data; +/** dsfmt mexp for check */ +static const int dsfmt_mexp = DSFMT_MEXP; + +/*---------------- + STATIC FUNCTIONS + ----------------*/ +inline static uint32_t ini_func1(uint32_t x); +inline static uint32_t ini_func2(uint32_t x); +inline static void gen_rand_array_c1o2(dsfmt_t *dsfmt, w128_t *array, int size); +inline static void gen_rand_array_c0o1(dsfmt_t *dsfmt, w128_t *array, int size); +inline static void gen_rand_array_o0c1(dsfmt_t *dsfmt, w128_t *array, int size); +inline static void gen_rand_array_o0o1(dsfmt_t *dsfmt, w128_t *array, int size); +inline static int idxof(int i); +static void initial_mask(dsfmt_t *dsfmt); +static void period_certification(dsfmt_t *dsfmt); + +#if defined(HAVE_SSE2) +/** 1 in 64bit for sse2 */ +static const union X128I_T sse2_int_one = {{1, 1}}; +/** 2.0 double for sse2 */ +static const union X128D_T sse2_double_two = {{2.0, 2.0}}; +/** -1.0 double for sse2 */ +static const union X128D_T sse2_double_m_one = {{-1.0, -1.0}}; +#endif + +/** + * This function simulate a 32-bit array index overlapped to 64-bit + * array of LITTLE ENDIAN in BIG ENDIAN machine. + */ +#if defined(DSFMT_BIG_ENDIAN) +inline static int idxof(int i) { return i ^ 1; } +#else +inline static int idxof(int i) { return i; } +#endif + +#if defined(HAVE_SSE2) +/** + * This function converts the double precision floating point numbers which + * distribute uniformly in the range [1, 2) to those which distribute uniformly + * in the range [0, 1). + * @param w 128bit stracture of double precision floating point numbers (I/O) + */ +inline static void convert_c0o1(w128_t *w) { + w->sd = _mm_add_pd(w->sd, sse2_double_m_one.d128); +} + +/** + * This function converts the double precision floating point numbers which + * distribute uniformly in the range [1, 2) to those which distribute uniformly + * in the range (0, 1]. + * @param w 128bit stracture of double precision floating point numbers (I/O) + */ +inline static void convert_o0c1(w128_t *w) { + w->sd = _mm_sub_pd(sse2_double_two.d128, w->sd); +} + +/** + * This function converts the double precision floating point numbers which + * distribute uniformly in the range [1, 2) to those which distribute uniformly + * in the range (0, 1). + * @param w 128bit stracture of double precision floating point numbers (I/O) + */ +inline static void convert_o0o1(w128_t *w) { + w->si = _mm_or_si128(w->si, sse2_int_one.i128); + w->sd = _mm_add_pd(w->sd, sse2_double_m_one.d128); +} +#else /* standard C and altivec */ +/** + * This function converts the double precision floating point numbers which + * distribute uniformly in the range [1, 2) to those which distribute uniformly + * in the range [0, 1). + * @param w 128bit stracture of double precision floating point numbers (I/O) + */ +inline static void convert_c0o1(w128_t *w) { + w->d[0] -= 1.0; + w->d[1] -= 1.0; +} + +/** + * This function converts the double precision floating point numbers which + * distribute uniformly in the range [1, 2) to those which distribute uniformly + * in the range (0, 1]. + * @param w 128bit stracture of double precision floating point numbers (I/O) + */ +inline static void convert_o0c1(w128_t *w) { + w->d[0] = 2.0 - w->d[0]; + w->d[1] = 2.0 - w->d[1]; +} + +/** + * This function converts the double precision floating point numbers which + * distribute uniformly in the range [1, 2) to those which distribute uniformly + * in the range (0, 1). + * @param w 128bit stracture of double precision floating point numbers (I/O) + */ +inline static void convert_o0o1(w128_t *w) { + w->u[0] |= 1; + w->u[1] |= 1; + w->d[0] -= 1.0; + w->d[1] -= 1.0; +} +#endif + +/** + * This function fills the user-specified array with double precision + * floating point pseudorandom numbers of the IEEE 754 format. + * @param dsfmt dsfmt state vector. + * @param array an 128-bit array to be filled by pseudorandom numbers. + * @param size number of 128-bit pseudorandom numbers to be generated. + */ +inline static void gen_rand_array_c1o2(dsfmt_t *dsfmt, w128_t *array, + int size) { + int i, j; + w128_t lung; + + lung = dsfmt->status[DSFMT_N]; + do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1], &lung); + for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) { + do_recursion(&array[i], &dsfmt->status[i], &dsfmt->status[i + DSFMT_POS1], + &lung); + } + for (; i < DSFMT_N; i++) { + do_recursion(&array[i], &dsfmt->status[i], &array[i + DSFMT_POS1 - DSFMT_N], + &lung); + } + for (; i < size - DSFMT_N; i++) { + do_recursion(&array[i], &array[i - DSFMT_N], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + } + for (j = 0; j < 2 * DSFMT_N - size; j++) { + dsfmt->status[j] = array[j + size - DSFMT_N]; + } + for (; i < size; i++, j++) { + do_recursion(&array[i], &array[i - DSFMT_N], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + dsfmt->status[j] = array[i]; + } + dsfmt->status[DSFMT_N] = lung; +} + +/** + * This function fills the user-specified array with double precision + * floating point pseudorandom numbers of the IEEE 754 format. + * @param dsfmt dsfmt state vector. + * @param array an 128-bit array to be filled by pseudorandom numbers. + * @param size number of 128-bit pseudorandom numbers to be generated. + */ +inline static void gen_rand_array_c0o1(dsfmt_t *dsfmt, w128_t *array, + int size) { + int i, j; + w128_t lung; + + lung = dsfmt->status[DSFMT_N]; + do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1], &lung); + for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) { + do_recursion(&array[i], &dsfmt->status[i], &dsfmt->status[i + DSFMT_POS1], + &lung); + } + for (; i < DSFMT_N; i++) { + do_recursion(&array[i], &dsfmt->status[i], &array[i + DSFMT_POS1 - DSFMT_N], + &lung); + } + for (; i < size - DSFMT_N; i++) { + do_recursion(&array[i], &array[i - DSFMT_N], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + convert_c0o1(&array[i - DSFMT_N]); + } + for (j = 0; j < 2 * DSFMT_N - size; j++) { + dsfmt->status[j] = array[j + size - DSFMT_N]; + } + for (; i < size; i++, j++) { + do_recursion(&array[i], &array[i - DSFMT_N], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + dsfmt->status[j] = array[i]; + convert_c0o1(&array[i - DSFMT_N]); + } + for (i = size - DSFMT_N; i < size; i++) { + convert_c0o1(&array[i]); + } + dsfmt->status[DSFMT_N] = lung; +} + +/** + * This function fills the user-specified array with double precision + * floating point pseudorandom numbers of the IEEE 754 format. + * @param dsfmt dsfmt state vector. + * @param array an 128-bit array to be filled by pseudorandom numbers. + * @param size number of 128-bit pseudorandom numbers to be generated. + */ +inline static void gen_rand_array_o0o1(dsfmt_t *dsfmt, w128_t *array, + int size) { + int i, j; + w128_t lung; + + lung = dsfmt->status[DSFMT_N]; + do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1], &lung); + for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) { + do_recursion(&array[i], &dsfmt->status[i], &dsfmt->status[i + DSFMT_POS1], + &lung); + } + for (; i < DSFMT_N; i++) { + do_recursion(&array[i], &dsfmt->status[i], &array[i + DSFMT_POS1 - DSFMT_N], + &lung); + } + for (; i < size - DSFMT_N; i++) { + do_recursion(&array[i], &array[i - DSFMT_N], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + convert_o0o1(&array[i - DSFMT_N]); + } + for (j = 0; j < 2 * DSFMT_N - size; j++) { + dsfmt->status[j] = array[j + size - DSFMT_N]; + } + for (; i < size; i++, j++) { + do_recursion(&array[i], &array[i - DSFMT_N], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + dsfmt->status[j] = array[i]; + convert_o0o1(&array[i - DSFMT_N]); + } + for (i = size - DSFMT_N; i < size; i++) { + convert_o0o1(&array[i]); + } + dsfmt->status[DSFMT_N] = lung; +} + +/** + * This function fills the user-specified array with double precision + * floating point pseudorandom numbers of the IEEE 754 format. + * @param dsfmt dsfmt state vector. + * @param array an 128-bit array to be filled by pseudorandom numbers. + * @param size number of 128-bit pseudorandom numbers to be generated. + */ +inline static void gen_rand_array_o0c1(dsfmt_t *dsfmt, w128_t *array, + int size) { + int i, j; + w128_t lung; + + lung = dsfmt->status[DSFMT_N]; + do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1], &lung); + for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) { + do_recursion(&array[i], &dsfmt->status[i], &dsfmt->status[i + DSFMT_POS1], + &lung); + } + for (; i < DSFMT_N; i++) { + do_recursion(&array[i], &dsfmt->status[i], &array[i + DSFMT_POS1 - DSFMT_N], + &lung); + } + for (; i < size - DSFMT_N; i++) { + do_recursion(&array[i], &array[i - DSFMT_N], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + convert_o0c1(&array[i - DSFMT_N]); + } + for (j = 0; j < 2 * DSFMT_N - size; j++) { + dsfmt->status[j] = array[j + size - DSFMT_N]; + } + for (; i < size; i++, j++) { + do_recursion(&array[i], &array[i - DSFMT_N], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + dsfmt->status[j] = array[i]; + convert_o0c1(&array[i - DSFMT_N]); + } + for (i = size - DSFMT_N; i < size; i++) { + convert_o0c1(&array[i]); + } + dsfmt->status[DSFMT_N] = lung; +} + +/** + * This function represents a function used in the initialization + * by init_by_array + * @param x 32-bit integer + * @return 32-bit integer + */ +static uint32_t ini_func1(uint32_t x) { + return (x ^ (x >> 27)) * (uint32_t)1664525UL; +} + +/** + * This function represents a function used in the initialization + * by init_by_array + * @param x 32-bit integer + * @return 32-bit integer + */ +static uint32_t ini_func2(uint32_t x) { + return (x ^ (x >> 27)) * (uint32_t)1566083941UL; +} + +/** + * This function initializes the internal state array to fit the IEEE + * 754 format. + * @param dsfmt dsfmt state vector. + */ +static void initial_mask(dsfmt_t *dsfmt) { + int i; + uint64_t *psfmt; + + psfmt = &dsfmt->status[0].u[0]; + for (i = 0; i < DSFMT_N * 2; i++) { + psfmt[i] = (psfmt[i] & DSFMT_LOW_MASK) | DSFMT_HIGH_CONST; + } +} + +/** + * This function certificate the period of 2^{SFMT_MEXP}-1. + * @param dsfmt dsfmt state vector. + */ +static void period_certification(dsfmt_t *dsfmt) { + uint64_t pcv[2] = {DSFMT_PCV1, DSFMT_PCV2}; + uint64_t tmp[2]; + uint64_t inner; + int i; +#if (DSFMT_PCV2 & 1) != 1 + int j; + uint64_t work; +#endif + + tmp[0] = (dsfmt->status[DSFMT_N].u[0] ^ DSFMT_FIX1); + tmp[1] = (dsfmt->status[DSFMT_N].u[1] ^ DSFMT_FIX2); + + inner = tmp[0] & pcv[0]; + inner ^= tmp[1] & pcv[1]; + for (i = 32; i > 0; i >>= 1) { + inner ^= inner >> i; + } + inner &= 1; + /* check OK */ + if (inner == 1) { + return; + } + /* check NG, and modification */ +#if (DSFMT_PCV2 & 1) == 1 + dsfmt->status[DSFMT_N].u[1] ^= 1; +#else + for (i = 1; i >= 0; i--) { + work = 1; + for (j = 0; j < 64; j++) { + if ((work & pcv[i]) != 0) { + dsfmt->status[DSFMT_N].u[i] ^= work; + return; + } + work = work << 1; + } + } +#endif + return; +} + +/*---------------- + PUBLIC FUNCTIONS + ----------------*/ +/** + * This function returns the identification string. The string shows + * the Mersenne exponent, and all parameters of this generator. + * @return id string. + */ +const char *dsfmt_get_idstring(void) { return DSFMT_IDSTR; } + +/** + * This function returns the minimum size of array used for \b + * fill_array functions. + * @return minimum size of array used for fill_array functions. + */ +int dsfmt_get_min_array_size(void) { return DSFMT_N64; } + +/** + * This function fills the internal state array with double precision + * floating point pseudorandom numbers of the IEEE 754 format. + * @param dsfmt dsfmt state vector. + */ +void dsfmt_gen_rand_all(dsfmt_t *dsfmt) { + int i; + w128_t lung; + + lung = dsfmt->status[DSFMT_N]; + do_recursion(&dsfmt->status[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1], + &lung); + for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) { + do_recursion(&dsfmt->status[i], &dsfmt->status[i], + &dsfmt->status[i + DSFMT_POS1], &lung); + } + for (; i < DSFMT_N; i++) { + do_recursion(&dsfmt->status[i], &dsfmt->status[i], + &dsfmt->status[i + DSFMT_POS1 - DSFMT_N], &lung); + } + dsfmt->status[DSFMT_N] = lung; +} + +/** + * This function generates double precision floating point + * pseudorandom numbers which distribute in the range [1, 2) to the + * specified array[] by one call. The number of pseudorandom numbers + * is specified by the argument \b size, which must be at least (SFMT_MEXP + * / 128) * 2 and a multiple of two. The function + * get_min_array_size() returns this minimum size. The generation by + * this function is much faster than the following fill_array_xxx functions. + * + * For initialization, init_gen_rand() or init_by_array() must be called + * before the first call of this function. This function can not be + * used after calling genrand_xxx functions, without initialization. + * + * @param dsfmt dsfmt state vector. + * @param array an array where pseudorandom numbers are filled + * by this function. The pointer to the array must be "aligned" + * (namely, must be a multiple of 16) in the SIMD version, since it + * refers to the address of a 128-bit integer. In the standard C + * version, the pointer is arbitrary. + * + * @param size the number of 64-bit pseudorandom integers to be + * generated. size must be a multiple of 2, and greater than or equal + * to (SFMT_MEXP / 128) * 2. + * + * @note \b memalign or \b posix_memalign is available to get aligned + * memory. Mac OSX doesn't have these functions, but \b malloc of OSX + * returns the pointer to the aligned memory block. + */ +void dsfmt_fill_array_close1_open2(dsfmt_t *dsfmt, double array[], int size) { + assert(size % 2 == 0); + assert(size >= DSFMT_N64); + gen_rand_array_c1o2(dsfmt, (w128_t *)array, size / 2); +} + +/** + * This function generates double precision floating point + * pseudorandom numbers which distribute in the range (0, 1] to the + * specified array[] by one call. This function is the same as + * fill_array_close1_open2() except the distribution range. + * + * @param dsfmt dsfmt state vector. + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa fill_array_close1_open2() + */ +void dsfmt_fill_array_open_close(dsfmt_t *dsfmt, double array[], int size) { + assert(size % 2 == 0); + assert(size >= DSFMT_N64); + gen_rand_array_o0c1(dsfmt, (w128_t *)array, size / 2); +} + +/** + * This function generates double precision floating point + * pseudorandom numbers which distribute in the range [0, 1) to the + * specified array[] by one call. This function is the same as + * fill_array_close1_open2() except the distribution range. + * + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param dsfmt dsfmt state vector. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa fill_array_close1_open2() + */ +void dsfmt_fill_array_close_open(dsfmt_t *dsfmt, double array[], int size) { + assert(size % 2 == 0); + assert(size >= DSFMT_N64); + gen_rand_array_c0o1(dsfmt, (w128_t *)array, size / 2); +} + +/** + * This function generates double precision floating point + * pseudorandom numbers which distribute in the range (0, 1) to the + * specified array[] by one call. This function is the same as + * fill_array_close1_open2() except the distribution range. + * + * @param dsfmt dsfmt state vector. + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa fill_array_close1_open2() + */ +void dsfmt_fill_array_open_open(dsfmt_t *dsfmt, double array[], int size) { + assert(size % 2 == 0); + assert(size >= DSFMT_N64); + gen_rand_array_o0o1(dsfmt, (w128_t *)array, size / 2); +} + +#if defined(__INTEL_COMPILER) +#pragma warning(disable : 981) +#endif +/** + * This function initializes the internal state array with a 32-bit + * integer seed. + * @param dsfmt dsfmt state vector. + * @param seed a 32-bit integer used as the seed. + * @param mexp caller's mersenne expornent + */ +void dsfmt_chk_init_gen_rand(dsfmt_t *dsfmt, uint32_t seed, int mexp) { + int i; + uint32_t *psfmt; + + /* make sure caller program is compiled with the same MEXP */ + if (mexp != dsfmt_mexp) { + fprintf(stderr, "DSFMT_MEXP doesn't match with dSFMT.c\n"); + exit(1); + } + psfmt = &dsfmt->status[0].u32[0]; + psfmt[idxof(0)] = seed; + for (i = 1; i < (DSFMT_N + 1) * 4; i++) { + psfmt[idxof(i)] = + 1812433253UL * (psfmt[idxof(i - 1)] ^ (psfmt[idxof(i - 1)] >> 30)) + i; + } + initial_mask(dsfmt); + period_certification(dsfmt); + dsfmt->idx = DSFMT_N64; +} + +/** + * This function initializes the internal state array, + * with an array of 32-bit integers used as the seeds + * @param dsfmt dsfmt state vector. + * @param init_key the array of 32-bit integers, used as a seed. + * @param key_length the length of init_key. + * @param mexp caller's mersenne expornent + */ +void dsfmt_chk_init_by_array(dsfmt_t *dsfmt, uint32_t init_key[], + int key_length, int mexp) { + int i, j, count; + uint32_t r; + uint32_t *psfmt32; + int lag; + int mid; + int size = (DSFMT_N + 1) * 4; /* pulmonary */ + + /* make sure caller program is compiled with the same MEXP */ + if (mexp != dsfmt_mexp) { + fprintf(stderr, "DSFMT_MEXP doesn't match with dSFMT.c\n"); + exit(1); + } + if (size >= 623) { + lag = 11; + } else if (size >= 68) { + lag = 7; + } else if (size >= 39) { + lag = 5; + } else { + lag = 3; + } + mid = (size - lag) / 2; + + psfmt32 = &dsfmt->status[0].u32[0]; + memset(dsfmt->status, 0x8b, sizeof(dsfmt->status)); + if (key_length + 1 > size) { + count = key_length + 1; + } else { + count = size; + } + r = ini_func1(psfmt32[idxof(0)] ^ psfmt32[idxof(mid % size)] ^ + psfmt32[idxof((size - 1) % size)]); + psfmt32[idxof(mid % size)] += r; + r += key_length; + psfmt32[idxof((mid + lag) % size)] += r; + psfmt32[idxof(0)] = r; + count--; + for (i = 1, j = 0; (j < count) && (j < key_length); j++) { + r = ini_func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % size)] ^ + psfmt32[idxof((i + size - 1) % size)]); + psfmt32[idxof((i + mid) % size)] += r; + r += init_key[j] + i; + psfmt32[idxof((i + mid + lag) % size)] += r; + psfmt32[idxof(i)] = r; + i = (i + 1) % size; + } + for (; j < count; j++) { + r = ini_func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % size)] ^ + psfmt32[idxof((i + size - 1) % size)]); + psfmt32[idxof((i + mid) % size)] += r; + r += i; + psfmt32[idxof((i + mid + lag) % size)] += r; + psfmt32[idxof(i)] = r; + i = (i + 1) % size; + } + for (j = 0; j < size; j++) { + r = ini_func2(psfmt32[idxof(i)] + psfmt32[idxof((i + mid) % size)] + + psfmt32[idxof((i + size - 1) % size)]); + psfmt32[idxof((i + mid) % size)] ^= r; + r -= i; + psfmt32[idxof((i + mid + lag) % size)] ^= r; + psfmt32[idxof(i)] = r; + i = (i + 1) % size; + } + initial_mask(dsfmt); + period_certification(dsfmt); + dsfmt->idx = DSFMT_N64; +} +#if defined(__INTEL_COMPILER) +#pragma warning(default : 981) +#endif + +#if defined(__cplusplus) +} +#endif + +extern inline double dsfmt_next_double(dsfmt_state *state); + +extern inline uint64_t dsfmt_next64(dsfmt_state *state); + +extern inline uint32_t dsfmt_next32(dsfmt_state *state); + +void dsfmt_jump(dsfmt_state *state) { dSFMT_jump(state->state, poly_128); };
\ No newline at end of file diff --git a/numpy/random/src/dsfmt/dSFMT.h b/numpy/random/src/dsfmt/dSFMT.h new file mode 100644 index 000000000..224d0108f --- /dev/null +++ b/numpy/random/src/dsfmt/dSFMT.h @@ -0,0 +1,691 @@ +#pragma once +/** + * @file dSFMT.h + * + * @brief double precision SIMD oriented Fast Mersenne Twister(dSFMT) + * pseudorandom number generator based on IEEE 754 format. + * + * @author Mutsuo Saito (Hiroshima University) + * @author Makoto Matsumoto (Hiroshima University) + * + * Copyright (C) 2007, 2008 Mutsuo Saito, Makoto Matsumoto and + * Hiroshima University. All rights reserved. + * Copyright (C) 2012 Mutsuo Saito, Makoto Matsumoto, + * Hiroshima University and The University of Tokyo. + * All rights reserved. + * + * The new BSD License is applied to this software. + * see LICENSE.txt + * + * @note We assume that your system has inttypes.h. If your system + * doesn't have inttypes.h, you have to typedef uint32_t and uint64_t, + * and you have to define PRIu64 and PRIx64 in this file as follows: + * @verbatim + typedef unsigned int uint32_t + typedef unsigned long long uint64_t + #define PRIu64 "llu" + #define PRIx64 "llx" +@endverbatim + * uint32_t must be exactly 32-bit unsigned integer type (no more, no + * less), and uint64_t must be exactly 64-bit unsigned integer type. + * PRIu64 and PRIx64 are used for printf function to print 64-bit + * unsigned int and 64-bit unsigned int in hexadecimal format. + */ + +#ifndef DSFMT_H +#define DSFMT_H +#if defined(__cplusplus) +extern "C" { +#endif + +#include <assert.h> +#include <stdio.h> + +#if !defined(DSFMT_MEXP) +#ifdef __GNUC__ +#warning "DSFMT_MEXP is not defined. I assume DSFMT_MEXP is 19937." +#endif +#define DSFMT_MEXP 19937 +#endif +/*----------------- + BASIC DEFINITIONS + -----------------*/ +/* Mersenne Exponent. The period of the sequence + * is a multiple of 2^DSFMT_MEXP-1. + * #define DSFMT_MEXP 19937 */ +/** DSFMT generator has an internal state array of 128-bit integers, + * and N is its size. */ +#define DSFMT_N ((DSFMT_MEXP - 128) / 104 + 1) +/** N32 is the size of internal state array when regarded as an array + * of 32-bit integers.*/ +#define DSFMT_N32 (DSFMT_N * 4) +/** N64 is the size of internal state array when regarded as an array + * of 64-bit integers.*/ +#define DSFMT_N64 (DSFMT_N * 2) + +#if !defined(DSFMT_BIG_ENDIAN) +#if defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) +#if __BYTE_ORDER == __BIG_ENDIAN +#define DSFMT_BIG_ENDIAN 1 +#endif +#elif defined(_BYTE_ORDER) && defined(_BIG_ENDIAN) +#if _BYTE_ORDER == _BIG_ENDIAN +#define DSFMT_BIG_ENDIAN 1 +#endif +#elif defined(__BYTE_ORDER__) && defined(__BIG_ENDIAN__) +#if __BYTE_ORDER__ == __BIG_ENDIAN__ +#define DSFMT_BIG_ENDIAN 1 +#endif +#elif defined(BYTE_ORDER) && defined(BIG_ENDIAN) +#if BYTE_ORDER == BIG_ENDIAN +#define DSFMT_BIG_ENDIAN 1 +#endif +#elif defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN) || \ + defined(__BIG_ENDIAN__) || defined(BIG_ENDIAN) +#define DSFMT_BIG_ENDIAN 1 +#endif +#endif + +#if defined(DSFMT_BIG_ENDIAN) && defined(__amd64) +#undef DSFMT_BIG_ENDIAN +#endif + +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) +#include <inttypes.h> +#elif defined(_MSC_VER) || defined(__BORLANDC__) +#if !defined(DSFMT_UINT32_DEFINED) && !defined(SFMT_UINT32_DEFINED) +typedef unsigned int uint32_t; +typedef unsigned __int64 uint64_t; +#ifndef UINT64_C +#define UINT64_C(v) (v##ui64) +#endif +#define DSFMT_UINT32_DEFINED +#if !defined(inline) && !defined(__cplusplus) +#define inline __forceinline +#endif +#endif +#else +#include <inttypes.h> +#if !defined(inline) && !defined(__cplusplus) +#if defined(__GNUC__) +#define inline __forceinline__ +#else +#define inline +#endif +#endif +#endif + +#ifndef PRIu64 +#if defined(_MSC_VER) || defined(__BORLANDC__) +#define PRIu64 "I64u" +#define PRIx64 "I64x" +#else +#define PRIu64 "llu" +#define PRIx64 "llx" +#endif +#endif + +#ifndef UINT64_C +#define UINT64_C(v) (v##ULL) +#endif + +/*------------------------------------------ + 128-bit SIMD like data type for standard C + ------------------------------------------*/ +#if defined(HAVE_ALTIVEC) +#if !defined(__APPLE__) +#include <altivec.h> +#endif +/** 128-bit data structure */ +union W128_T { + vector unsigned int s; + uint64_t u[2]; + uint32_t u32[4]; + double d[2]; +}; + +#elif defined(HAVE_SSE2) +#include <emmintrin.h> + +/** 128-bit data structure */ +union W128_T { + __m128i si; + __m128d sd; + uint64_t u[2]; + uint32_t u32[4]; + double d[2]; +}; +#else /* standard C */ +/** 128-bit data structure */ +union W128_T { + uint64_t u[2]; + uint32_t u32[4]; + double d[2]; +}; +#endif + +/** 128-bit data type */ +typedef union W128_T w128_t; + +/** the 128-bit internal state array */ +struct DSFMT_T { + w128_t status[DSFMT_N + 1]; + int idx; +}; +typedef struct DSFMT_T dsfmt_t; + +/** dsfmt internal state vector */ +extern dsfmt_t dsfmt_global_data; +/** dsfmt mexp for check */ +extern const int dsfmt_global_mexp; + +void dsfmt_gen_rand_all(dsfmt_t *dsfmt); +void dsfmt_fill_array_open_close(dsfmt_t *dsfmt, double array[], int size); +void dsfmt_fill_array_close_open(dsfmt_t *dsfmt, double array[], int size); +void dsfmt_fill_array_open_open(dsfmt_t *dsfmt, double array[], int size); +void dsfmt_fill_array_close1_open2(dsfmt_t *dsfmt, double array[], int size); +void dsfmt_chk_init_gen_rand(dsfmt_t *dsfmt, uint32_t seed, int mexp); +void dsfmt_chk_init_by_array(dsfmt_t *dsfmt, uint32_t init_key[], + int key_length, int mexp); +const char *dsfmt_get_idstring(void); +int dsfmt_get_min_array_size(void); + +#if defined(__GNUC__) +#define DSFMT_PRE_INLINE inline static +#define DSFMT_PST_INLINE __attribute__((always_inline)) +#elif defined(_MSC_VER) && _MSC_VER >= 1200 +#define DSFMT_PRE_INLINE __forceinline static +#define DSFMT_PST_INLINE +#else +#define DSFMT_PRE_INLINE inline static +#define DSFMT_PST_INLINE +#endif +DSFMT_PRE_INLINE uint32_t dsfmt_genrand_uint32(dsfmt_t *dsfmt) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double +dsfmt_genrand_close1_open2(dsfmt_t *dsfmt) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double +dsfmt_genrand_close_open(dsfmt_t *dsfmt) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double +dsfmt_genrand_open_close(dsfmt_t *dsfmt) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double +dsfmt_genrand_open_open(dsfmt_t *dsfmt) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE uint32_t dsfmt_gv_genrand_uint32(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double dsfmt_gv_genrand_close1_open2(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double dsfmt_gv_genrand_close_open(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double dsfmt_gv_genrand_open_close(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double dsfmt_gv_genrand_open_open(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void dsfmt_gv_fill_array_open_close(double array[], + int size) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void dsfmt_gv_fill_array_close_open(double array[], + int size) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void dsfmt_gv_fill_array_open_open(double array[], + int size) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void +dsfmt_gv_fill_array_close1_open2(double array[], int size) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void dsfmt_gv_init_gen_rand(uint32_t seed) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void dsfmt_gv_init_by_array(uint32_t init_key[], + int key_length) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void dsfmt_init_gen_rand(dsfmt_t *dsfmt, + uint32_t seed) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void dsfmt_init_by_array(dsfmt_t *dsfmt, uint32_t init_key[], + int key_length) DSFMT_PST_INLINE; + +/** + * This function generates and returns unsigned 32-bit integer. + * This is slower than SFMT, only for convenience usage. + * dsfmt_init_gen_rand() or dsfmt_init_by_array() must be called + * before this function. + * @param dsfmt dsfmt internal state date + * @return double precision floating point pseudorandom number + */ +inline static uint32_t dsfmt_genrand_uint32(dsfmt_t *dsfmt) { + uint32_t r; + uint64_t *psfmt64 = &dsfmt->status[0].u[0]; + + if (dsfmt->idx >= DSFMT_N64) { + dsfmt_gen_rand_all(dsfmt); + dsfmt->idx = 0; + } + r = psfmt64[dsfmt->idx++] & 0xffffffffU; + return r; +} + +/** + * This function generates and returns double precision pseudorandom + * number which distributes uniformly in the range [1, 2). This is + * the primitive and faster than generating numbers in other ranges. + * dsfmt_init_gen_rand() or dsfmt_init_by_array() must be called + * before this function. + * @param dsfmt dsfmt internal state date + * @return double precision floating point pseudorandom number + */ +inline static double dsfmt_genrand_close1_open2(dsfmt_t *dsfmt) { + double r; + double *psfmt64 = &dsfmt->status[0].d[0]; + + if (dsfmt->idx >= DSFMT_N64) { + dsfmt_gen_rand_all(dsfmt); + dsfmt->idx = 0; + } + r = psfmt64[dsfmt->idx++]; + return r; +} + +/** + * This function generates and returns unsigned 32-bit integer. + * This is slower than SFMT, only for convenience usage. + * dsfmt_gv_init_gen_rand() or dsfmt_gv_init_by_array() must be called + * before this function. This function uses \b global variables. + * @return double precision floating point pseudorandom number + */ +inline static uint32_t dsfmt_gv_genrand_uint32(void) { + return dsfmt_genrand_uint32(&dsfmt_global_data); +} + +/** + * This function generates and returns double precision pseudorandom + * number which distributes uniformly in the range [1, 2). + * dsfmt_gv_init_gen_rand() or dsfmt_gv_init_by_array() must be called + * before this function. This function uses \b global variables. + * @return double precision floating point pseudorandom number + */ +inline static double dsfmt_gv_genrand_close1_open2(void) { + return dsfmt_genrand_close1_open2(&dsfmt_global_data); +} + +/** + * This function generates and returns double precision pseudorandom + * number which distributes uniformly in the range [0, 1). + * dsfmt_init_gen_rand() or dsfmt_init_by_array() must be called + * before this function. + * @param dsfmt dsfmt internal state date + * @return double precision floating point pseudorandom number + */ +inline static double dsfmt_genrand_close_open(dsfmt_t *dsfmt) { + return dsfmt_genrand_close1_open2(dsfmt) - 1.0; +} + +/** + * This function generates and returns double precision pseudorandom + * number which distributes uniformly in the range [0, 1). + * dsfmt_gv_init_gen_rand() or dsfmt_gv_init_by_array() must be called + * before this function. This function uses \b global variables. + * @return double precision floating point pseudorandom number + */ +inline static double dsfmt_gv_genrand_close_open(void) { + return dsfmt_gv_genrand_close1_open2() - 1.0; +} + +/** + * This function generates and returns double precision pseudorandom + * number which distributes uniformly in the range (0, 1]. + * dsfmt_init_gen_rand() or dsfmt_init_by_array() must be called + * before this function. + * @param dsfmt dsfmt internal state date + * @return double precision floating point pseudorandom number + */ +inline static double dsfmt_genrand_open_close(dsfmt_t *dsfmt) { + return 2.0 - dsfmt_genrand_close1_open2(dsfmt); +} + +/** + * This function generates and returns double precision pseudorandom + * number which distributes uniformly in the range (0, 1]. + * dsfmt_gv_init_gen_rand() or dsfmt_gv_init_by_array() must be called + * before this function. This function uses \b global variables. + * @return double precision floating point pseudorandom number + */ +inline static double dsfmt_gv_genrand_open_close(void) { + return 2.0 - dsfmt_gv_genrand_close1_open2(); +} + +/** + * This function generates and returns double precision pseudorandom + * number which distributes uniformly in the range (0, 1). + * dsfmt_init_gen_rand() or dsfmt_init_by_array() must be called + * before this function. + * @param dsfmt dsfmt internal state date + * @return double precision floating point pseudorandom number + */ +inline static double dsfmt_genrand_open_open(dsfmt_t *dsfmt) { + double *dsfmt64 = &dsfmt->status[0].d[0]; + union { + double d; + uint64_t u; + } r; + + if (dsfmt->idx >= DSFMT_N64) { + dsfmt_gen_rand_all(dsfmt); + dsfmt->idx = 0; + } + r.d = dsfmt64[dsfmt->idx++]; + r.u |= 1; + return r.d - 1.0; +} + +/** + * This function generates and returns double precision pseudorandom + * number which distributes uniformly in the range (0, 1). + * dsfmt_gv_init_gen_rand() or dsfmt_gv_init_by_array() must be called + * before this function. This function uses \b global variables. + * @return double precision floating point pseudorandom number + */ +inline static double dsfmt_gv_genrand_open_open(void) { + return dsfmt_genrand_open_open(&dsfmt_global_data); +} + +/** + * This function generates double precision floating point + * pseudorandom numbers which distribute in the range [1, 2) to the + * specified array[] by one call. This function is the same as + * dsfmt_fill_array_close1_open2() except that this function uses + * \b global variables. + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa dsfmt_fill_array_close1_open2() + */ +inline static void dsfmt_gv_fill_array_close1_open2(double array[], int size) { + dsfmt_fill_array_close1_open2(&dsfmt_global_data, array, size); +} + +/** + * This function generates double precision floating point + * pseudorandom numbers which distribute in the range (0, 1] to the + * specified array[] by one call. This function is the same as + * dsfmt_gv_fill_array_close1_open2() except the distribution range. + * This function uses \b global variables. + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa dsfmt_fill_array_close1_open2() and \sa + * dsfmt_gv_fill_array_close1_open2() + */ +inline static void dsfmt_gv_fill_array_open_close(double array[], int size) { + dsfmt_fill_array_open_close(&dsfmt_global_data, array, size); +} + +/** + * This function generates double precision floating point + * pseudorandom numbers which distribute in the range [0, 1) to the + * specified array[] by one call. This function is the same as + * dsfmt_gv_fill_array_close1_open2() except the distribution range. + * This function uses \b global variables. + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa dsfmt_fill_array_close1_open2() \sa + * dsfmt_gv_fill_array_close1_open2() + */ +inline static void dsfmt_gv_fill_array_close_open(double array[], int size) { + dsfmt_fill_array_close_open(&dsfmt_global_data, array, size); +} + +/** + * This function generates double precision floating point + * pseudorandom numbers which distribute in the range (0, 1) to the + * specified array[] by one call. This function is the same as + * dsfmt_gv_fill_array_close1_open2() except the distribution range. + * This function uses \b global variables. + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa dsfmt_fill_array_close1_open2() \sa + * dsfmt_gv_fill_array_close1_open2() + */ +inline static void dsfmt_gv_fill_array_open_open(double array[], int size) { + dsfmt_fill_array_open_open(&dsfmt_global_data, array, size); +} + +/** + * This function initializes the internal state array with a 32-bit + * integer seed. + * @param dsfmt dsfmt state vector. + * @param seed a 32-bit integer used as the seed. + */ +inline static void dsfmt_init_gen_rand(dsfmt_t *dsfmt, uint32_t seed) { + dsfmt_chk_init_gen_rand(dsfmt, seed, DSFMT_MEXP); +} + +/** + * This function initializes the internal state array with a 32-bit + * integer seed. This function uses \b global variables. + * @param seed a 32-bit integer used as the seed. + * see also \sa dsfmt_init_gen_rand() + */ +inline static void dsfmt_gv_init_gen_rand(uint32_t seed) { + dsfmt_init_gen_rand(&dsfmt_global_data, seed); +} + +/** + * This function initializes the internal state array, + * with an array of 32-bit integers used as the seeds. + * @param dsfmt dsfmt state vector + * @param init_key the array of 32-bit integers, used as a seed. + * @param key_length the length of init_key. + */ +inline static void dsfmt_init_by_array(dsfmt_t *dsfmt, uint32_t init_key[], + int key_length) { + dsfmt_chk_init_by_array(dsfmt, init_key, key_length, DSFMT_MEXP); +} + +/** + * This function initializes the internal state array, + * with an array of 32-bit integers used as the seeds. + * This function uses \b global variables. + * @param init_key the array of 32-bit integers, used as a seed. + * @param key_length the length of init_key. + * see also \sa dsfmt_init_by_array() + */ +inline static void dsfmt_gv_init_by_array(uint32_t init_key[], int key_length) { + dsfmt_init_by_array(&dsfmt_global_data, init_key, key_length); +} + +#if !defined(DSFMT_DO_NOT_USE_OLD_NAMES) +DSFMT_PRE_INLINE const char *get_idstring(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE int get_min_array_size(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void init_gen_rand(uint32_t seed) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void init_by_array(uint32_t init_key[], + int key_length) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double genrand_close1_open2(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double genrand_close_open(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double genrand_open_close(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double genrand_open_open(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void fill_array_open_close(double array[], + int size) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void fill_array_close_open(double array[], + int size) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void fill_array_open_open(double array[], + int size) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void fill_array_close1_open2(double array[], + int size) DSFMT_PST_INLINE; + +/** + * This function is just the same as dsfmt_get_idstring(). + * @return id string. + * see also \sa dsfmt_get_idstring() + */ +inline static const char *get_idstring(void) { return dsfmt_get_idstring(); } + +/** + * This function is just the same as dsfmt_get_min_array_size(). + * @return minimum size of array used for fill_array functions. + * see also \sa dsfmt_get_min_array_size() + */ +inline static int get_min_array_size(void) { + return dsfmt_get_min_array_size(); +} + +/** + * This function is just the same as dsfmt_gv_init_gen_rand(). + * @param seed a 32-bit integer used as the seed. + * see also \sa dsfmt_gv_init_gen_rand(), \sa dsfmt_init_gen_rand(). + */ +inline static void init_gen_rand(uint32_t seed) { + dsfmt_gv_init_gen_rand(seed); +} + +/** + * This function is just the same as dsfmt_gv_init_by_array(). + * @param init_key the array of 32-bit integers, used as a seed. + * @param key_length the length of init_key. + * see also \sa dsfmt_gv_init_by_array(), \sa dsfmt_init_by_array(). + */ +inline static void init_by_array(uint32_t init_key[], int key_length) { + dsfmt_gv_init_by_array(init_key, key_length); +} + +/** + * This function is just the same as dsfmt_gv_genrand_close1_open2(). + * @return double precision floating point number. + * see also \sa dsfmt_genrand_close1_open2() \sa + * dsfmt_gv_genrand_close1_open2() + */ +inline static double genrand_close1_open2(void) { + return dsfmt_gv_genrand_close1_open2(); +} + +/** + * This function is just the same as dsfmt_gv_genrand_close_open(). + * @return double precision floating point number. + * see also \sa dsfmt_genrand_close_open() \sa + * dsfmt_gv_genrand_close_open() + */ +inline static double genrand_close_open(void) { + return dsfmt_gv_genrand_close_open(); +} + +/** + * This function is just the same as dsfmt_gv_genrand_open_close(). + * @return double precision floating point number. + * see also \sa dsfmt_genrand_open_close() \sa + * dsfmt_gv_genrand_open_close() + */ +inline static double genrand_open_close(void) { + return dsfmt_gv_genrand_open_close(); +} + +/** + * This function is just the same as dsfmt_gv_genrand_open_open(). + * @return double precision floating point number. + * see also \sa dsfmt_genrand_open_open() \sa + * dsfmt_gv_genrand_open_open() + */ +inline static double genrand_open_open(void) { + return dsfmt_gv_genrand_open_open(); +} + +/** + * This function is juset the same as dsfmt_gv_fill_array_open_close(). + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa dsfmt_gv_fill_array_open_close(), \sa + * dsfmt_fill_array_close1_open2(), \sa + * dsfmt_gv_fill_array_close1_open2() + */ +inline static void fill_array_open_close(double array[], int size) { + dsfmt_gv_fill_array_open_close(array, size); +} + +/** + * This function is juset the same as dsfmt_gv_fill_array_close_open(). + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa dsfmt_gv_fill_array_close_open(), \sa + * dsfmt_fill_array_close1_open2(), \sa + * dsfmt_gv_fill_array_close1_open2() + */ +inline static void fill_array_close_open(double array[], int size) { + dsfmt_gv_fill_array_close_open(array, size); +} + +/** + * This function is juset the same as dsfmt_gv_fill_array_open_open(). + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa dsfmt_gv_fill_array_open_open(), \sa + * dsfmt_fill_array_close1_open2(), \sa + * dsfmt_gv_fill_array_close1_open2() + */ +inline static void fill_array_open_open(double array[], int size) { + dsfmt_gv_fill_array_open_open(array, size); +} + +/** + * This function is juset the same as dsfmt_gv_fill_array_close1_open2(). + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa dsfmt_fill_array_close1_open2(), \sa + * dsfmt_gv_fill_array_close1_open2() + */ +inline static void fill_array_close1_open2(double array[], int size) { + dsfmt_gv_fill_array_close1_open2(array, size); +} +#endif /* DSFMT_DO_NOT_USE_OLD_NAMES */ + +#if defined(__cplusplus) +} +#endif + +#endif /* DSFMT_H */ + +union random_val_t { + double d; + uint64_t u64; +}; + +typedef struct s_dsfmt_state { + dsfmt_t *state; + int has_uint32; + uint32_t uinteger; + + double *buffered_uniforms; + int buffer_loc; +} dsfmt_state; + +static inline double dsfmt_next_buffer(dsfmt_state *state) { + if (state->buffer_loc < DSFMT_N64) { + double out = state->buffered_uniforms[state->buffer_loc]; + state->buffer_loc++; + return out; + } + dsfmt_fill_array_close1_open2(state->state, state->buffered_uniforms, + DSFMT_N64); + state->buffer_loc = 1; + return state->buffered_uniforms[0]; +} + +static inline double dsfmt_next_double(dsfmt_state *state) { + return dsfmt_next_buffer(state) - 1.0; +} + +static inline uint64_t dsfmt_next64(dsfmt_state *state) { + /* Discard bottom 16 bits */ + uint64_t out; + union random_val_t rv; + rv.d = dsfmt_next_buffer(state); + out = (rv.u64 >> 16) << 32; + rv.d = dsfmt_next_buffer(state); + out |= (rv.u64 >> 16) & 0xffffffff; + return out; +} + +static inline uint32_t dsfmt_next32(dsfmt_state *state) { + /* Discard bottom 16 bits */ + union random_val_t rv; + rv.d = dsfmt_next_buffer(state); + // uint64_t *out = (uint64_t *)&d; + return (uint32_t)((rv.u64 >> 16) & 0xffffffff); +} + +static inline uint64_t dsfmt_next_raw(dsfmt_state *state) { + union random_val_t rv; + rv.d = dsfmt_next_buffer(state); + return rv.u64; +} + +void dsfmt_jump(dsfmt_state *state);
\ No newline at end of file diff --git a/numpy/random/src/entropy/LICENSE.md b/numpy/random/src/entropy/LICENSE.md new file mode 100644 index 000000000..b7276aad7 --- /dev/null +++ b/numpy/random/src/entropy/LICENSE.md @@ -0,0 +1,25 @@ +# ENTROPY + +_Parts of this module were derived from PCG_ + + +PCG Random Number Generation for C. + +Copyright 2014 Melissa O'Neill <oneill@pcg-random.org> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +For additional information about the PCG random number generation scheme, +including its license and other licensing options, visit + + http://www.pcg-random.org diff --git a/numpy/random/src/entropy/entropy.c b/numpy/random/src/entropy/entropy.c new file mode 100644 index 000000000..ead4bef83 --- /dev/null +++ b/numpy/random/src/entropy/entropy.c @@ -0,0 +1,174 @@ +/* + * PCG Random Number Generation for C. + * + * Copyright 2014 Melissa O'Neill <oneill@pcg-random.org> + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For additional information about the PCG random number generation scheme, + * including its license and other licensing options, visit + * + * http://www.pcg-random.org + */ + +/* This code provides a mechanism for getting external randomness for + * seeding purposes. Usually, it's just a wrapper around reading + * /dev/random. + * + * Alas, because not every system provides /dev/random, we need a fallback. + * We also need to try to test whether or not to use the fallback. + */ + +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "entropy.h" +#ifdef _WIN32 +/* Windows */ +#include <sys/timeb.h> +#include <time.h> +#include <windows.h> + +#include <wincrypt.h> +#else +/* Unix */ +#include <sys/time.h> +#include <time.h> +#include <unistd.h> +#endif + +#ifndef IS_UNIX +#if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || \ + (defined(__APPLE__) && defined(__MACH__))) +#define IS_UNIX 1 +#else +#define IS_UNIX 0 +#endif +#endif + +// If HAVE_DEV_RANDOM is set, we use that value, otherwise we guess +#ifndef HAVE_DEV_RANDOM +#define HAVE_DEV_RANDOM IS_UNIX +#endif + +#if HAVE_DEV_RANDOM +#include <fcntl.h> +#include <unistd.h> +#endif + +#if HAVE_DEV_RANDOM +/* entropy_getbytes(dest, size): + * Use /dev/random to get some external entropy for seeding purposes. + * + * Note: + * If reading /dev/random fails (which ought to never happen), it returns + * false, otherwise it returns true. If it fails, you could instead call + * fallback_entropy_getbytes which always succeeds. + */ + +bool entropy_getbytes(void *dest, size_t size) { + int fd = open("/dev/urandom", O_RDONLY); + if (fd < 0) + return false; + ssize_t sz = read(fd, dest, size); + if ((sz < 0) || ((size_t)sz < size)) + return false; + return close(fd) == 0; +} +#endif + +#ifdef _WIN32 +bool entropy_getbytes(void *dest, size_t size) { + HCRYPTPROV hCryptProv; + BOOL done; + + if (!CryptAcquireContext(&hCryptProv, NULL, NULL, PROV_RSA_FULL, + CRYPT_VERIFYCONTEXT) || + !hCryptProv) { + return true; + } + done = CryptGenRandom(hCryptProv, (DWORD)size, (unsigned char *)dest); + CryptReleaseContext(hCryptProv, 0); + if (!done) { + return false; + } + + return true; +} +#endif + +/* Thomas Wang 32/64 bits integer hash function */ +uint32_t entropy_hash_32(uint32_t key) { + key += ~(key << 15); + key ^= (key >> 10); + key += (key << 3); + key ^= (key >> 6); + key += ~(key << 11); + key ^= (key >> 16); + return key; +} + +uint64_t entropy_hash_64(uint64_t key) { + key = (~key) + (key << 21); // key = (key << 21) - key - 1; + key = key ^ (key >> 24); + key = (key + (key << 3)) + (key << 8); // key * 265 + key = key ^ (key >> 14); + key = (key + (key << 2)) + (key << 4); // key * 21 + key = key ^ (key >> 28); + key = key + (key << 31); + return key; +} + +uint32_t entropy_randombytes(void) { + +#ifndef _WIN32 + struct timeval tv; + gettimeofday(&tv, NULL); + return entropy_hash_32(getpid()) ^ entropy_hash_32(tv.tv_sec) ^ + entropy_hash_32(tv.tv_usec) ^ entropy_hash_32(clock()); +#else + uint32_t out = 0; + int64_t counter; + struct _timeb tv; + _ftime_s(&tv); + out = entropy_hash_32(GetCurrentProcessId()) ^ + entropy_hash_32((uint32_t)tv.time) ^ entropy_hash_32(tv.millitm) ^ + entropy_hash_32(clock()); + if (QueryPerformanceCounter((LARGE_INTEGER *)&counter) != 0) + out ^= entropy_hash_32((uint32_t)(counter & 0xffffffff)); + return out; +#endif +} + +bool entropy_fallback_getbytes(void *dest, size_t size) { + int hashes = (int)size; + uint32_t *hash = malloc(hashes * sizeof(uint32_t)); + // uint32_t hash[hashes]; + int i; + for (i = 0; i < hashes; i++) { + hash[i] = entropy_randombytes(); + } + memcpy(dest, (void *)hash, size); + free(hash); + return true; +} + +void entropy_fill(void *dest, size_t size) { + bool success; + success = entropy_getbytes(dest, size); + if (!success) { + entropy_fallback_getbytes(dest, size); + } +} diff --git a/numpy/random/src/entropy/entropy.h b/numpy/random/src/entropy/entropy.h new file mode 100644 index 000000000..785603dd3 --- /dev/null +++ b/numpy/random/src/entropy/entropy.h @@ -0,0 +1,48 @@ +#ifndef _RANDOMDGEN__ENTROPY_H_ +#define _RANDOMDGEN__ENTROPY_H_ +/* + * PCG Random Number Generation for C. + * + * Copyright 2014 Melissa O'Neill <oneill@pcg-random.org> + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For additional information about the PCG random number generation scheme, + * including its license and other licensing options, visit + * + * http://www.pcg-random.org + */ + +#include <stddef.h> +#ifdef _WIN32 +#if _MSC_VER == 1500 +#include "../common/stdint.h" +typedef int bool; +#define false 0 +#define true 1 +#else +#include <stdbool.h> +#include <stdint.h> +#endif +#else +#include <stdbool.h> +#include <stdint.h> +#endif + +extern void entropy_fill(void *dest, size_t size); + +extern bool entropy_getbytes(void *dest, size_t size); + +extern bool entropy_fallback_getbytes(void *dest, size_t size); + +#endif diff --git a/numpy/random/src/legacy/LICENSE.md b/numpy/random/src/legacy/LICENSE.md new file mode 100644 index 000000000..88b1791b2 --- /dev/null +++ b/numpy/random/src/legacy/LICENSE.md @@ -0,0 +1,30 @@ +Copyright (c) 2005-2017, NumPy Developers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + +* Neither the name of the NumPy Developers nor the names of any + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/numpy/random/src/legacy/distributions-boxmuller.c b/numpy/random/src/legacy/distributions-boxmuller.c new file mode 100644 index 000000000..5d3ba27f8 --- /dev/null +++ b/numpy/random/src/legacy/distributions-boxmuller.c @@ -0,0 +1,214 @@ +#include "distributions-boxmuller.h" + +static NPY_INLINE double legacy_double(aug_brng_t *aug_state) { + return aug_state->basicrng->next_double(aug_state->basicrng->state); +} + +double legacy_gauss(aug_brng_t *aug_state) { + if (aug_state->has_gauss) { + const double temp = aug_state->gauss; + aug_state->has_gauss = false; + aug_state->gauss = 0.0; + return temp; + } else { + double f, x1, x2, r2; + + do { + x1 = 2.0 * legacy_double(aug_state) - 1.0; + x2 = 2.0 * legacy_double(aug_state) - 1.0; + r2 = x1 * x1 + x2 * x2; + } while (r2 >= 1.0 || r2 == 0.0); + + /* Polar method, a more efficient version of the Box-Muller approach. */ + f = sqrt(-2.0 * log(r2) / r2); + /* Keep for next call */ + aug_state->gauss = f * x1; + aug_state->has_gauss = true; + return f * x2; + } +} + +double legacy_standard_exponential(aug_brng_t *aug_state) { + /* We use -log(1-U) since U is [0, 1) */ + return -log(1.0 - legacy_double(aug_state)); +} + +double legacy_standard_gamma(aug_brng_t *aug_state, double shape) { + double b, c; + double U, V, X, Y; + + if (shape == 1.0) { + return legacy_standard_exponential(aug_state); + } + else if (shape == 0.0) { + return 0.0; + } else if (shape < 1.0) { + for (;;) { + U = legacy_double(aug_state); + V = legacy_standard_exponential(aug_state); + if (U <= 1.0 - shape) { + X = pow(U, 1. / shape); + if (X <= V) { + return X; + } + } else { + Y = -log((1 - U) / shape); + X = pow(1.0 - shape + shape * Y, 1. / shape); + if (X <= (V + Y)) { + return X; + } + } + } + } else { + b = shape - 1. / 3.; + c = 1. / sqrt(9 * b); + for (;;) { + do { + X = legacy_gauss(aug_state); + V = 1.0 + c * X; + } while (V <= 0.0); + + V = V * V * V; + U = legacy_double(aug_state); + if (U < 1.0 - 0.0331 * (X * X) * (X * X)) + return (b * V); + if (log(U) < 0.5 * X * X + b * (1. - V + log(V))) + return (b * V); + } + } +} + +double legacy_gamma(aug_brng_t *aug_state, double shape, double scale) { + return scale * legacy_standard_gamma(aug_state, shape); +} + +double legacy_pareto(aug_brng_t *aug_state, double a) { + return exp(legacy_standard_exponential(aug_state) / a) - 1; +} + +double legacy_weibull(aug_brng_t *aug_state, double a) { + if (a == 0.0) { + return 0.0; + } + return pow(legacy_standard_exponential(aug_state), 1. / a); +} + +double legacy_power(aug_brng_t *aug_state, double a) { + return pow(1 - exp(-legacy_standard_exponential(aug_state)), 1. / a); +} + +double legacy_chisquare(aug_brng_t *aug_state, double df) { + return 2.0 * legacy_standard_gamma(aug_state, df / 2.0); +} + +double legacy_noncentral_chisquare(aug_brng_t *aug_state, double df, + double nonc) { + double out; + if (nonc == 0) { + return legacy_chisquare(aug_state, df); + } + if (1 < df) { + const double Chi2 = legacy_chisquare(aug_state, df - 1); + const double n = legacy_gauss(aug_state) + sqrt(nonc); + return Chi2 + n * n; + } else { + const long i = random_poisson(aug_state->basicrng, nonc / 2.0); + out = legacy_chisquare(aug_state, df + 2 * i); + /* Insert nan guard here to avoid changing the stream */ + if (npy_isnan(nonc)){ + return NPY_NAN; + } else { + return out; + } + } +} + +double legacy_noncentral_f(aug_brng_t *aug_state, double dfnum, double dfden, + double nonc) { + double t = legacy_noncentral_chisquare(aug_state, dfnum, nonc) * dfden; + return t / (legacy_chisquare(aug_state, dfden) * dfnum); +} + +double legacy_wald(aug_brng_t *aug_state, double mean, double scale) { + double U, X, Y; + double mu_2l; + + mu_2l = mean / (2 * scale); + Y = legacy_gauss(aug_state); + Y = mean * Y * Y; + X = mean + mu_2l * (Y - sqrt(4 * scale * Y + Y * Y)); + U = legacy_double(aug_state); + if (U <= mean / (mean + X)) { + return X; + } else { + return mean * mean / X; + } +} + +double legacy_normal(aug_brng_t *aug_state, double loc, double scale) { + return loc + scale * legacy_gauss(aug_state); +} + +double legacy_lognormal(aug_brng_t *aug_state, double mean, double sigma) { + return exp(legacy_normal(aug_state, mean, sigma)); +} + +double legacy_standard_t(aug_brng_t *aug_state, double df) { + double num, denom; + + num = legacy_gauss(aug_state); + denom = legacy_standard_gamma(aug_state, df / 2); + return sqrt(df / 2) * num / sqrt(denom); +} + +int64_t legacy_negative_binomial(aug_brng_t *aug_state, double n, double p) { + double Y = legacy_gamma(aug_state, n, (1 - p) / p); + return random_poisson(aug_state->basicrng, Y); +} + +double legacy_standard_cauchy(aug_brng_t *aug_state) { + return legacy_gauss(aug_state) / legacy_gauss(aug_state); +} + +double legacy_beta(aug_brng_t *aug_state, double a, double b) { + double Ga, Gb; + + if ((a <= 1.0) && (b <= 1.0)) { + double U, V, X, Y; + /* Use Johnk's algorithm */ + + while (1) { + U = legacy_double(aug_state); + V = legacy_double(aug_state); + X = pow(U, 1.0 / a); + Y = pow(V, 1.0 / b); + + if ((X + Y) <= 1.0) { + if (X + Y > 0) { + return X / (X + Y); + } else { + double logX = log(U) / a; + double logY = log(V) / b; + double logM = logX > logY ? logX : logY; + logX -= logM; + logY -= logM; + + return exp(logX - log(exp(logX) + exp(logY))); + } + } + } + } else { + Ga = legacy_standard_gamma(aug_state, a); + Gb = legacy_standard_gamma(aug_state, b); + return Ga / (Ga + Gb); + } +} + +double legacy_f(aug_brng_t *aug_state, double dfnum, double dfden) { + return ((legacy_chisquare(aug_state, dfnum) * dfden) / + (legacy_chisquare(aug_state, dfden) * dfnum)); +} + +double legacy_exponential(aug_brng_t *aug_state, double scale) { + return scale * legacy_standard_exponential(aug_state); +} diff --git a/numpy/random/src/legacy/distributions-boxmuller.h b/numpy/random/src/legacy/distributions-boxmuller.h new file mode 100644 index 000000000..445686e6c --- /dev/null +++ b/numpy/random/src/legacy/distributions-boxmuller.h @@ -0,0 +1,40 @@ +#ifndef _RANDOMDGEN__DISTRIBUTIONS_LEGACY_H_ +#define _RANDOMDGEN__DISTRIBUTIONS_LEGACY_H_ + + +#include "../distributions/distributions.h" + +typedef struct aug_brng { + brng_t *basicrng; + int has_gauss; + double gauss; +} aug_brng_t; + +extern double legacy_gauss(aug_brng_t *aug_state); +extern double legacy_standard_exponential(aug_brng_t *aug_state); +extern double legacy_pareto(aug_brng_t *aug_state, double a); +extern double legacy_weibull(aug_brng_t *aug_state, double a); +extern double legacy_power(aug_brng_t *aug_state, double a); +extern double legacy_gamma(aug_brng_t *aug_state, double shape, double scale); +extern double legacy_pareto(aug_brng_t *aug_state, double a); +extern double legacy_weibull(aug_brng_t *aug_state, double a); +extern double legacy_chisquare(aug_brng_t *aug_state, double df); +extern double legacy_noncentral_chisquare(aug_brng_t *aug_state, double df, + double nonc); + +extern double legacy_noncentral_f(aug_brng_t *aug_state, double dfnum, + double dfden, double nonc); +extern double legacy_wald(aug_brng_t *aug_state, double mean, double scale); +extern double legacy_lognormal(aug_brng_t *aug_state, double mean, + double sigma); +extern double legacy_standard_t(aug_brng_t *aug_state, double df); +extern int64_t legacy_negative_binomial(aug_brng_t *aug_state, double n, + double p); +extern double legacy_standard_cauchy(aug_brng_t *state); +extern double legacy_beta(aug_brng_t *aug_state, double a, double b); +extern double legacy_f(aug_brng_t *aug_state, double dfnum, double dfden); +extern double legacy_normal(aug_brng_t *aug_state, double loc, double scale); +extern double legacy_standard_gamma(aug_brng_t *aug_state, double shape); +extern double legacy_exponential(aug_brng_t *aug_state, double scale); + +#endif diff --git a/numpy/random/src/mt19937/LICENSE.md b/numpy/random/src/mt19937/LICENSE.md new file mode 100644 index 000000000..f65c3d46e --- /dev/null +++ b/numpy/random/src/mt19937/LICENSE.md @@ -0,0 +1,61 @@ +# MT19937 + +Copyright (c) 2003-2005, Jean-Sebastien Roy (js@jeannot.org) + +The rk_random and rk_seed functions algorithms and the original design of +the Mersenne Twister RNG: + + Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The names of its contributors may not be used to endorse or promote + products derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Original algorithm for the implementation of rk_interval function from +Richard J. Wagner's implementation of the Mersenne Twister RNG, optimised by +Magnus Jonsson. + +Constants used in the rk_double implementation by Isaku Wada. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file diff --git a/numpy/random/src/mt19937/mt19937-benchmark.c b/numpy/random/src/mt19937/mt19937-benchmark.c new file mode 100644 index 000000000..039f8030a --- /dev/null +++ b/numpy/random/src/mt19937/mt19937-benchmark.c @@ -0,0 +1,31 @@ +/* + * cl mt19937-benchmark.c mt19937.c /Ox + * Measure-Command { .\mt19937-benchmark.exe } + * + * gcc mt19937-benchmark.c mt19937.c -O3 -o mt19937-benchmark + * time ./mt19937-benchmark + */ +#include "mt19937.h" +#include <inttypes.h> +#include <stdio.h> +#include <time.h> + +#define Q 1000000000 + +int main() { + int i; + uint32_t seed = 0x0; + uint64_t sum = 0, count = 0; + mt19937_state state; + mt19937_seed(&state, seed); + clock_t begin = clock(); + for (i = 0; i < Q; i++) { + sum += mt19937_next64(&state); + count++; + } + clock_t end = clock(); + double time_spent = (double)(end - begin) / CLOCKS_PER_SEC; + printf("0x%" PRIx64 "\ncount: %" PRIu64 "\n", sum, count); + printf("%" PRIu64 " randoms per second\n", + (uint64_t)(Q / time_spent) / 1000000 * 1000000); +} diff --git a/numpy/random/src/mt19937/mt19937-jump.c b/numpy/random/src/mt19937/mt19937-jump.c new file mode 100644 index 000000000..46b28cf96 --- /dev/null +++ b/numpy/random/src/mt19937/mt19937-jump.c @@ -0,0 +1,224 @@ +#include "mt19937-jump.h" +#include "mt19937.h" + +/* 32-bits function */ +/* return the i-th coefficient of the polynomial pf */ +unsigned long get_coef(unsigned long *pf, unsigned int deg) { + if ((pf[deg >> 5] & (LSB << (deg & 0x1ful))) != 0) + return (1); + else + return (0); +} + +/* 32-bit function */ +/* set the coefficient of the polynomial pf with v */ +void set_coef(unsigned long *pf, unsigned int deg, unsigned long v) { + if (v != 0) + pf[deg >> 5] ^= (LSB << (deg & 0x1ful)); + else + ; +} + +void gray_code(unsigned long *h) { + unsigned int i, j = 1, l = 1, term = LL; + + h[0] = 0; + + for (i = 1; i <= QQ; i++) { + l = (l << 1); + term = (term >> 1); + for (; j < l; j++) + h[j] = h[l - j - 1] ^ term; + } +} + +void copy_state(mt19937_state *target_state, mt19937_state *state) { + int i; + + for (i = 0; i < N; i++) + target_state->key[i] = state->key[i]; + + target_state->pos = state->pos; +} + +/* next state generating function */ +void gen_next(mt19937_state *state) { + int num; + unsigned long y; + static unsigned long mag02[2] = {0x0ul, MATRIX_A}; + + num = state->pos; + if (num < N - M) { + y = (state->key[num] & UPPER_MASK) | (state->key[num + 1] & LOWER_MASK); + state->key[num] = state->key[num + M] ^ (y >> 1) ^ mag02[y % 2]; + state->pos++; + } else if (num < N - 1) { + y = (state->key[num] & UPPER_MASK) | (state->key[num + 1] & LOWER_MASK); + state->key[num] = state->key[num + (M - N)] ^ (y >> 1) ^ mag02[y % 2]; + state->pos++; + } else if (num == N - 1) { + y = (state->key[N - 1] & UPPER_MASK) | (state->key[0] & LOWER_MASK); + state->key[N - 1] = state->key[M - 1] ^ (y >> 1) ^ mag02[y % 2]; + state->pos = 0; + } +} + +void add_state(mt19937_state *state1, mt19937_state *state2) { + int i, pt1 = state1->pos, pt2 = state2->pos; + + if (pt2 - pt1 >= 0) { + for (i = 0; i < N - pt2; i++) + state1->key[i + pt1] ^= state2->key[i + pt2]; + for (; i < N - pt1; i++) + state1->key[i + pt1] ^= state2->key[i + (pt2 - N)]; + for (; i < N; i++) + state1->key[i + (pt1 - N)] ^= state2->key[i + (pt2 - N)]; + } else { + for (i = 0; i < N - pt1; i++) + state1->key[i + pt1] ^= state2->key[i + pt2]; + for (; i < N - pt2; i++) + state1->key[i + (pt1 - N)] ^= state2->key[i + pt2]; + for (; i < N; i++) + state1->key[i + (pt1 - N)] ^= state2->key[i + (pt2 - N)]; + } +} + +/* +void gen_vec_h(mt19937_state *state, mt19937_state *vec_h, + unsigned long *h) { + int i; + unsigned long k, g; + mt19937_state v; + + gray_code(h); + + copy_state(&vec_h[0], state); + + for (i = 0; i < QQ; i++) + gen_next(&vec_h[0]); + + for (i = 1; i < LL; i++) { + copy_state(&v, state); + g = h[i] ^ h[i - 1]; + for (k = 1; k < g; k = (k << 1)) + gen_next(&v); + copy_state(&vec_h[h[i]], &vec_h[h[i - 1]]); + add_state(&vec_h[h[i]], &v); + } +} +*/ + +/* compute pf(ss) using Sliding window algorithm */ +/* +void calc_state(unsigned long *pf, mt19937_state *state, + mt19937_state *vec_h) { + mt19937_state *temp1; + int i = MEXP - 1, j, digit, skip = 0; + + temp1 = (mt19937_state *)calloc(1, sizeof(mt19937_state)); + + while (get_coef(pf, i) == 0) + i--; + + for (; i >= QQ; i--) { + if (get_coef(pf, i) != 0) { + for (j = 0; j < QQ + 1; j++) + gen_next(temp1); + digit = 0; + for (j = 0; j < QQ; j++) + digit = (digit << 1) ^ get_coef(pf, i - j - 1); + add_state(temp1, &vec_h[digit]); + i -= QQ; + } else + gen_next(temp1); + } + + for (; i > -1; i--) { + gen_next(temp1); + if (get_coef(pf, i) == 1) + add_state(temp1, state); + else + ; + } + + copy_state(state, temp1); + free(temp1); +} +*/ + +/* compute pf(ss) using standard Horner method */ +void horner1(unsigned long *pf, mt19937_state *state) { + int i = MEXP - 1; + mt19937_state *temp; + + temp = (mt19937_state *)calloc(1, sizeof(mt19937_state)); + + while (get_coef(pf, i) == 0) + i--; + + if (i > 0) { + copy_state(temp, state); + gen_next(temp); + i--; + for (; i > 0; i--) { + if (get_coef(pf, i) != 0) + add_state(temp, state); + else + ; + gen_next(temp); + } + if (get_coef(pf, 0) != 0) + add_state(temp, state); + else + ; + } else if (i == 0) + copy_state(temp, state); + else + ; + + copy_state(state, temp); + free(temp); +} + +void mt19937_jump_state(mt19937_state *state, const char *jump_str) { + unsigned long *pf; + int i; + + pf = (unsigned long *)calloc(P_SIZE, sizeof(unsigned long)); + + for (i = MEXP - 1; i > -1; i--) { + if (jump_str[i] == '1') + set_coef(pf, i, 1); + } + /* TODO: Should generate the next set and start from 0, but doesn't matter ?? + */ + if (state->pos >= N) { + state->pos = 0; + } + + horner1(pf, state); + + free(pf); +} +/* +void mt19937_jump(mt19937_state *state, const char *jump_str) +{ + unsigned long h[LL]; + mt19937_state vec_h[LL]; + unsigned long *pf; + int i; + + pf = (unsigned long *)calloc(P_SIZE, sizeof(unsigned long)); + + for (i = MEXP - 1; i > -1; i--) + { + if (jump_str[i] == '1') + set_coef(pf, i, 1); + } + + gen_vec_h(state, &vec_h, &h); + calc_state(pf, state, &vec_h); + + free(pf); +} +*/
\ No newline at end of file diff --git a/numpy/random/src/mt19937/mt19937-jump.h b/numpy/random/src/mt19937/mt19937-jump.h new file mode 100644 index 000000000..394c150a0 --- /dev/null +++ b/numpy/random/src/mt19937/mt19937-jump.h @@ -0,0 +1,15 @@ +#pragma once +#include "mt19937.h" +#include <stdlib.h> + +/* parameters for computing Jump */ +#define W_SIZE 32 /* size of unsigned long */ +#define MEXP 19937 +#define P_SIZE ((MEXP / W_SIZE) + 1) +#define LSB 0x00000001UL +#define QQ 7 +#define LL 128 /* LL = 2^(QQ) */ + +void mt19937_jump_state(mt19937_state *state, const char *jump_str); + +void set_coef(unsigned long *pf, unsigned int deg, unsigned long v);
\ No newline at end of file diff --git a/numpy/random/src/mt19937/mt19937-poly.h b/numpy/random/src/mt19937/mt19937-poly.h new file mode 100644 index 000000000..b03747881 --- /dev/null +++ b/numpy/random/src/mt19937/mt19937-poly.h @@ -0,0 +1,207 @@ +static const char * poly = +"0001000111110111011100100010101111000000010100100101000001110111100010101000110100101001011001010" +"1110101101100101011100101101011001110011100011110100001000001011100101100010100000010011101110011" +"0100001001111010000100100101001011100111101101001100000111001000011101100100010000001111110100010" +"0000111101000101000101101111001011000011001001001011010011001000001000011100100010110101111111101" +"0010001001100010011011101111101110111010111000010000011010110011111101100000100100101001010000001" +"1001111000011010011101001101011000111001110010110000011000110101111010110011011000001110110010001" +"1001101011011101000011001011111111100011001010111100000001111011111101000101000011000011111100101" +"0100001111101010101100000110100110010010101011011100110011000101100101011110010101110000101011100" +"0001010100010110100000111001100000011101011001101000001000101101010100010101100000100011110110011" +"0101100110111101010111100010100110100011111011100111000001110110010000000100000110101010111001111" +"0011110010000110101101010001110010100111111111100100101010010011101111011000010111101001110110110" +"1011101101101100110111000100101100111001011111110101001000011111010011000111110011100100001101111" +"1001010110110001000100001001000010000000001011011100101010010100011000110101001000010101100111101" +"0011110101100110111100000111001011011001100101111011000101001011011111110110100010001100101001100" +"1111110011111111110111011011100011000100110011011011011001101011100110010001111100001111100100001" +"1000100011001010100101010100111110001100111111011111100100011110011101101000110100101110010111111" +"1001010110000101001110010110001011011010101111111001110001100100011001000010111001011011000111100" +"1101001011110111111010011000110100001010000000101010101001111101111110101111110101110101010010100" +"1100100101010110011111001101100110001011000101010001000110011011111101111110001100000010110110101" +"1111110100001011101011101110111101100001111000011100000110110100100100100101011000111000100110001" +"0110110001001000111110101111000000100100010100100101101111100011010100111101110010000001011111111" +"1101010000011001010101111001111110001111100010100010100001011001110001010010100001011111110110111" +"1100100100001111000111110111000100010101010110100111100001011001101001111101001110010110110011010" +"1000010011000110000000110110110000111010010000111001100010100101010101111100010111000000011101110" +"1100011010110001101100110000001010001100111101101011100111110111000110010011011011001101001111100" +"1011111001100011010110101111100110111101011100000011000010001010001101001011000001111000101000100" +"0110001011001010110000001101100000011000011110010000101000011010011110001101111111010010101100100" +"1111010100000011011001111111011011111001101110101010110111110110101000100001011110111010100111100" +"0000001001111100111111111000100000100100010001011001100001111100100000001111011101100010011000111" +"0011110110100011011001110011100011011000010000000101101101001010111000010000010101111110000000100" +"1011010100001001000011001100011000000111100111100101010100000111000000110111011101011111100010101" +"0011001100110000010101111001000111001001010100011000110010011011101001001100101100000000111000111" +"0111111000010010010100000101010010000100101011111111111001100101101010011010100010111001011100011" +"1001001011010000110000111100010110110100000100110010000010010000001000110010101000110101101100100" +"0001100001100011110110010000100000100010011001010010110111100011011000101011001100001111110110110" +"0001100110010100011001101000100001110011011111101001101011110011011011111110111110101110010011001" +"1000000101100000101100100000100000001011000100100001100100101101010111101010111101010001001010110" +"0011111011001101001110110010100100000011001001111010001001100101110000000010111101000111111101010" +"0110101110101110001001110000111110100000101101100110010001111101111011001000101110111010110111110" +"0011001101011010001011000010000111111111101001011100110101011000000001111000101100011101011011100" +"1111101110000000000110001110011001101100111111010001110000111110100011000100001100110010000110111" +"1001011011001111011100000000011011000100000011000010010111000111101000011001001100011010001111000" +"0011110010100010001101011101010011001100000010101001001101111101000111001110110000000010111101001" +"1110110011101110111010011100101001010101100100011111100110001111011111110010100000011100110110001" +"1011100000101000010100011101000010111100101111101100110001010001010000101110000000110100010110011" +"1111110100101010011010100001100110110110011111110010000100001010011110010110001000000100000111000" +"0111001010011001000010111001100110100110110101111011110111001001000101010010010011000111110010101" +"1100110001100101001000010001101010011001110011001110001110010100010000000000000110111001010101000" +"0111111011011101000111011001011011000101110100010001111100101110000100001011111101111101010011001" +"0010001100011011101100010010101011001000001001010101100110001111001110011100110111111010110010001" +"1111111101111001001101101001001010011001110000101000110010111110010110111111000100101000101011010" +"0000101101101100000110101000101000010001111000100000111110011111111110010010001010001111011001100" +"0011110111000000111111000100001111101110100010101011001010110110011001010010001011100001010110101" +"0100000010101101000011001101110010000010110011000101100100000111111100011001110011010011001110000" +"1110011110000000001001001010100000111001010110001110011100011010010010001110010011001010111100000" +"1110000101101001011010001001010000111000010011010100001010110000101101110110011000011100111100001" +"1001000011010001110110111001100100001111110010110010011111000010100000001101110100000000101101000" +"0011000000100011000111110001000011100111110110000110101111101100011110100111111000000011011110110" +"1101011010111010010001001101000110110010000010101000000001100100100000001111011001001010110100011" +"1011000010101111010111000001001100111110000010110010011011110011111001000101111011010011010100001" +"0110011111100001011111101010010100110001001001001000100010101011011000011100111000110101110000001" +"1100001111100011110010000101011000010101111010001101010101100001100101100000100100000101011001100" +"0011001000101010101010100111000100100010101000111111101010000000101010101001000101010100100111001" +"1001100001010001100110111101010001111010011100000001001110100010010011110100001000011111100010001" +"0010001000100110101011001110100110101110110110100101111000110101101101001000001110011010110011001" +"0111111101011011101001111001011100001010110111000001100010110110100011010111011000111010100011000" +"1111010110001001010000110001000101101100010100000000100001111100000010111001000011000101010100001" +"0001101100011100010100101110010100000010011011010100000111110110000110101011011010010001110000111" +"0110101000110101110010011100010010100111001101110110010001101001101101010100001010001110111011011" +"1010011001010111101001011000100111001110011000000001101000001111001100001100000011001110100110011" +"0011000110001001010111111111110110111111000111100010010101110000101100101000001010001011010100010" +"1010010100010011101111100111010010010001110101011110110100001000001001000111001110010001001100100" +"1100100010001010011011110100000101101011101010110110100100010001110000111010111001111011111001011" +"0000000000011000100100100111001000101111000000110001011110101111110111100000000100101011000111011" +"1011010011101000001011001001110001111010000100101101010111001010001000100001000111011010000110111" +"1010110001001110001100001110011000101100000101100000000110101000000110101100100101110001100100100" +"0110000110101011100001010001010000011101111011111011011000100100101011110101111000001011110010110" +"0111011011100111101010110001111011010011111000010111110100001001010001011001000110111100000101011" +"0010111111010100000110111101001100000100001011101010100011010010000001101100100101001000100011000" +"0101010111111100100000111011101111100000011011111111010001100011001100101101011110101011101100001" +"0100010011101111111011000111111101001000101101111001111000101110010111001010101011010111000000101" +"0110010000010010101111100010111110000000011101001000011111001011111100111100100101100101111010110" +"1010101001110011111100111110100000111100100000111111000010100001111011111110110010001001000000000" +"1110100110010111100101111111001010001111001101100001011000111011100010100001000010100000011001000" +"0000111000110111001001100010111010100111111001111101100101000011001001110011100110101110001101110" +"1110000010110110010110000111001110110000011011100111000101100101000000001110011011001001111001111" +"0000101100001000000111100110110000110111111001101001111111010000001011110011011011100100110000110" +"1001011111101100100111111000000010001110111011010011011101001100000011001010000010101111111010110" +"0001000100101110101101100001001010100110010000110110100110011001000111011110110011001110111110101" +"0000011111011011001111010010101011000010011101001011100001010001111001000110000010000101010011111" +"0110011000001111101001110001101011111111001010010110100001101000000011101000101011101000110101111" +"0000101110011010010000110100000101100011000100101111100011001111011101001010100111001110100001101" +"0000110111011000000110011001101011110000101100110110000101100000110110100001001001110001110001001" +"1100110111111100101001100010010110011011110001000111111111001101111110010000011001011010111101001" +"1101111110101110110100101100110001101101001010111101101000000011111111100101000101110001000011001" +"1000111110111011010010101011110110110001010001001001100111111010011101111000000111011000011010011" +"0111010101001110010100101101000110000110001100010101001110101011010100000110110111111111110011110" +"0100011110100011001000110101111010000001011011110101001100111100010100101100010000010110011001111" +"0011011110001110010010100100011111110000110011011100010110110101001110011010101111011001010101011" +"1001001111001000001100100111000001000110110101100111000101011000000100001000100010011000001110011" +"0000111100000111001101011111010000010001100000010101101000111100001000010011110000001011001001100" +"0011011011111011100000111101001011101000010010001001111110010101111010110101101110110111010000101" +"1100011000000000110110100011010100100010001101010101101110110111111011010110011101011010110101011" +"1101000000010010011111000000101000110001000011100001101111010101100000100000100111111111100000000" +"0011100011100101110010111100010111110010101110101000011000111111001110111111000001101101011011111" +"1100110101001000011111001111000000001010001001010101101000001100111010101100010111001001111100000" +"1110101101110001011100011101101100001001001011100111100110011101111000100010010001111100001010010" +"1011001001010100101100010010000110010000101010111111001000011100000000101101110010001101110101001" +"1110000011100101010000011110000010001000001010110001010000100111001100110001111000100100011100110" +"1100010011110111001001100000100111001010000000000011100011111111101110010101111010100010000100001" +"0101101001010111111110000110110010100000001011110100010111110111010000001011110110111000000110010" +"0001100100111110001100010101000010011111100000100010000101110000111001101100100000011111111100010" +"1001101101001000001111000100100001010110111011110110001001010001110001001100011001001100000000101" +"1100011110101101011001100001010110001010000111100000011011011001000010101100010101110011001101110" +"0000101011010001010011111001011000010101010100110110111110101000111110001000010100000000100010100" +"1000111111000110110010001111000010101011101101111101011110101111100111111100111101000101000010011" +"0010111010100010011001000000010111100010000101001011001101100011100001001111010100100110101111111" +"1000010011110101001010011111111011101001110100001001100010000100001001100101101111011100100011001" +"1111010001011001111101011110101101000111110101001010011101010010010101001000000000011001100110001" +"0001000010101010101000010100111000001110000111001110001101111111000010101010111001011101001001011" +"0011001111011010101110101111110001001100100111010001011000010100000100000001001100000011000011101" +"1100000110000001011001110000101001010111101000110101000011000000111011100101010000111000010010101" +"1010100101100001011011011110110011000100100101010011111101000000100001001101000011000101010111101" +"1110111111100010111000111000010110111010010110000000000100101001000111101101100000000110111011001" +"0100000000100100011110111011101101101101010110001110100001100001001011000000111111110100011110011" +"0010000010000000010100110011110000000010000011111000111101011110000000000010101101001100000010010" +"1011001001101110110011100001100011101001101011110011010001011101000100011111001010100000011111111" +"1010101100000010001000110000110000101000110100110011100000110010110100011111010001000011100001001" +"1000101000010111111011100010111000111001010100110000000010011011101010101111000110001000110111011" +"1011100001100011010001101011010100110110011100000010111001011111110010100110100010001100000011100" +"0001011001011000101011010000001010011010001011000111000011000011110011111001111010001101011010010" +"0010010001001001101000101001011011101110001100010001010100010111111001100100000010001111100010111" +"0100001111001100101001011101010010110010100010001100011010100110000100011010111110001011011001000" +"1001001111011010010011101110100001111100000110101001010111110001101100110010111010111001011111010" +"1110111011111110000001110010000010011111000111011011000011000010011110011111111101100101111011100" +"0101101100000110101110000111111111111010110101010100111000011111011001100000100000101011000101110" +"1011010010100000000100100000010111101110111001000011111011111110100011010010000110001101111101100" +"1100010111001011011001011001010100100110100101001000111011011001100011001010010101111001100100110" +"1000110000111011100101110101101000011001010010100011000001111001110110101101010010110110001100100" +"0100001011101100111001010001111011010110010010110010110111110001001001111001111010010001010001101" +"1110100110101100011110100100110111000111010110011000100100110110001101111100111110100001000110000" +"1110011011001101100101100000001010100011101000010100111011111100011010000110000001011100010000101" +"0100101000010001110010001100010110011111111101111000011001110111011100110010010100100010001000010" +"0100001110010000011000110001101011101001110100100011011001000111010101110100110011010111001100001" +"0100001001101010010111110101110111000000010100111101011010101001000001001000001000101101111000000" +"0110000101110100001111001101110111011110010111101000100101110111010101001101100001110001101101101" +"0010101100100101000100100100110111000111000111100111000001100001000111101011000110111110001010000" +"0100110010001101100011010111000111010111000111110000110000101111101110010110111001011000111010001" +"1011000010010101010010011001000011010110111011010001001010100111001000010110110110101110000110000" +"1110110010011001011011000100011101001001000111011100100000000000100001101101000101000100000111001" +"0011100001100110101011011101110111101111000100100011100001010001011001110010101010001110101101110" +"1011001110111111111010101101000010111111011011011100011100101010001011011100011111011100101011000" +"1000110100101000011111010011110000000101101110010000101100001000100000000010010110000000000110011" +"1000000000001111001001000100000111001110111111001111101100001100111000101100011000100111111110011" +"1110010101011010111100110010110001010000101111111101001010100010001001111010111000010000010010001" +"1111111101100100001101011011100001010101000111110111111101011010011111111101000111011001011011000" +"0000101011100011101110110011101111011110011110010000011001111001110111011011111010011011001110111" +"0101100111110100000100010110010010101001010100010111000101111001011011001001110010100011101111110" +"1101011110010101101011010010011111110000011010011101000000010000111010100100111110111000001101010" +"0101100001111001111010101011110001001010000011010110010100011100100100111110100110000010011111001" +"0100010011001001010101110111111010011101101100000101011110111010011110001111110100111011110011010" +"0111001010110101010110000011001010000000101101010101001101011000011011010110101010101111101101100" +"1100100000111101010111011011011110011001100010010000010100101000111111101011100111010101011000111" +"1100110010101100010011111100000110011111101011100100001110001100001010101001001100010011001000100" +"1101101000101101110010000001101001001110101111000110111000011101111110100100110111000000101011110" +"0001100100001010101001101111001000001100000011010000100101100000001110100010010000110110101010111" +"1100010100000110011100101010111110010110111100000010110011011001011110111001010011011110010001110" +"1101110000001011101101011111101011111110110110000111110011101100110100010000100000110100010010110" +"0011000011000110101001110100111010110000100010110101110111100010110001000111100111001011011110010" +"0001001110101001101101011010111001001101100011101001011011001110011010001010110100111001111100101" +"1000111001010010000010111010101110001100110111111000011101001000001010010011101000111001100111110" +"1110100100100110010111111101010011101111011011111011011010011110100101100001011000001001001010010" +"1100001000000110110011011101010001011110010001001110110100100001101101001011101010001110111111010" +"1100011100101000011110111110110011111111100010110010110111010010001111101110011011010110000001000" +"0010110100010101110100001000010011100110001110001110010100010010010110011100100110010100001110011" +"1100001011010000001101011011011110100000001110100111001000101000001000001001000010000111010000100" +"0111100000101010110010111010010101100000001100110101001001000110001110111011110001010010010011000" +"1100001111101101100001111000101100110010001000111001101101011110100110100011101000011111011010101" +"0101000011111010010110001001100110110111000100100011011101000010001010110001111001111101110001111" +"0100100000010111010011111110000101001001011110100100010011101110011010100101100001010000001110100" +"0011111101111000100110011000011001100100001010110011111100111010100011110100010101011110011001000" +"0000110000100100001011101110111010001001011110010101111100001111101101111011011110001010000100010" +"1001100100100100110010010101100110000000100000000111110011100111101001010000010000000000101011100" +"0011101011100110000001100101010101011111111011010011110010011011001010011101010010100010001011010" +"1100010011101011010111110100001010100011000011001001011011101111110011001110010001100101011001101" +"0100010001111111100000101000001011010100011100111011010111001100110110001100110101000011010001010" +"1011100001001010011110001010100100001101110011101011100100101010001100110011110010001100100001000" +"0110001001110110010111101011101101010111001010011010101110000010100010000111011000010110011000001" +"1000110010100001110001100010010000001101111110000010010110100000000000001111110010001110111100001" +"0100111101000011101110010101011011000101011010111100111111001011110001110011110011011010010111101" +"1010111011101101000001110111001010011001110010100100100100001010001100101010111001110100000110111" +"1010000111000011101101100101101001100000011100100111100110010110011100101000111110111000110111110" +"1101100101011101100111011111111001111000011110111110101100000111000101100100110111000010100101000" +"0110000011011101111101111000110101011000010111010000111011000000100011101010100111001111101010111" +"0001110100001000100001011101001010001110100000101100001011101111100111101011111001111100101101111" +"0101100001110011111110110100110010000011011111101101110110000110110011100110111000111101000010111" +"0111101011100100000000011101111011000100001000111000000111011010101010110000111111101010110001111" +"0000110100111101111011001010101110000011001101001101000010011001101011111110111101010111010011100" +"0101010011001111101111001100101000101000111110111001011111100000001101111011000001001100111111111" +"1010111101000001111011110010001001001110100111110010000011110000011000000101001100011110110011001" +"1010101001000010001010110000010011110101011110010111010001010111101100001001100011101001111101001" +"0110110100111001110011100011111010010010100010111000001100001011010010000100100110101010111001001" +"0110000101011011011100110111111001010000001001011010101010010001011010111100111010101101000101101" +"0100100001011101110111111001111111110110111011000101010000010000011111001000100101100100100110110" +"1100000111110010110011010100000100011111110001110010110001000001001111001101110110110101101010111" +"0000100111101100010001110010110111100011100101100011"; diff --git a/numpy/random/src/mt19937/mt19937-test-data-gen.c b/numpy/random/src/mt19937/mt19937-test-data-gen.c new file mode 100644 index 000000000..4f4ec1d64 --- /dev/null +++ b/numpy/random/src/mt19937/mt19937-test-data-gen.c @@ -0,0 +1,59 @@ +/* + * Generate testing csv files + * + * cl mt19937-test-data-gen.c randomkit.c + * -IC:\Anaconda\Lib\site-packages\numpy\core\include -IC:\Anaconda\include + * Advapi32.lib Kernel32.lib C:\Anaconda\libs\python36.lib -DRK_NO_WINCRYPT=1 + * + */ +#include "randomkit.h" +#include <inttypes.h> +#include <stdio.h> + +#define N 1000 + +int main() { + uint64_t sum = 0; + uint32_t seed = 0xDEADBEAF; + int i; + rk_state state; + rk_seed(seed, &state); + uint64_t store[N]; + for (i = 0; i < N; i++) { + store[i] = (uint64_t)rk_random(&state); + } + + FILE *fp; + fp = fopen("mt19937-testset-1.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx32 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); + + seed = 0; + rk_seed(seed, &state); + for (i = 0; i < N; i++) { + store[i] = (uint64_t)rk_random(&state); + } + fp = fopen("mt19937-testset-2.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx32 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); +} diff --git a/numpy/random/src/mt19937/mt19937.c b/numpy/random/src/mt19937/mt19937.c new file mode 100644 index 000000000..e5ca9e0cf --- /dev/null +++ b/numpy/random/src/mt19937/mt19937.c @@ -0,0 +1,107 @@ +#include "mt19937.h" +#include "mt19937-jump.h" +#include "mt19937-poly.h" + +void mt19937_seed(mt19937_state *state, uint32_t seed) { + int pos; + seed &= 0xffffffffUL; + + /* Knuth's PRNG as used in the Mersenne Twister reference implementation */ + for (pos = 0; pos < RK_STATE_LEN; pos++) { + state->key[pos] = seed; + seed = (1812433253UL * (seed ^ (seed >> 30)) + pos + 1) & 0xffffffffUL; + } + state->pos = RK_STATE_LEN; +} + +/* initializes mt[RK_STATE_LEN] with a seed */ +static void init_genrand(mt19937_state *state, uint32_t s) { + int mti; + uint32_t *mt = state->key; + + mt[0] = s & 0xffffffffUL; + for (mti = 1; mti < RK_STATE_LEN; mti++) { + /* + * See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. + * In the previous versions, MSBs of the seed affect + * only MSBs of the array mt[]. + * 2002/01/09 modified by Makoto Matsumoto + */ + mt[mti] = (1812433253UL * (mt[mti - 1] ^ (mt[mti - 1] >> 30)) + mti); + /* for > 32 bit machines */ + mt[mti] &= 0xffffffffUL; + } + state->pos = mti; + return; +} + +/* + * initialize by an array with array-length + * init_key is the array for initializing keys + * key_length is its length + */ +void mt19937_init_by_array(mt19937_state *state, uint32_t *init_key, + int key_length) { + /* was signed in the original code. RDH 12/16/2002 */ + int i = 1; + int j = 0; + uint32_t *mt = state->key; + int k; + + init_genrand(state, 19650218UL); + k = (RK_STATE_LEN > key_length ? RK_STATE_LEN : key_length); + for (; k; k--) { + /* non linear */ + mt[i] = (mt[i] ^ ((mt[i - 1] ^ (mt[i - 1] >> 30)) * 1664525UL)) + + init_key[j] + j; + /* for > 32 bit machines */ + mt[i] &= 0xffffffffUL; + i++; + j++; + if (i >= RK_STATE_LEN) { + mt[0] = mt[RK_STATE_LEN - 1]; + i = 1; + } + if (j >= key_length) { + j = 0; + } + } + for (k = RK_STATE_LEN - 1; k; k--) { + mt[i] = (mt[i] ^ ((mt[i - 1] ^ (mt[i - 1] >> 30)) * 1566083941UL)) - + i; /* non linear */ + mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */ + i++; + if (i >= RK_STATE_LEN) { + mt[0] = mt[RK_STATE_LEN - 1]; + i = 1; + } + } + + mt[0] = 0x80000000UL; /* MSB is 1; assuring non-zero initial array */ +} + +void mt19937_gen(mt19937_state *state) { + uint32_t y; + int i; + + for (i = 0; i < N - M; i++) { + y = (state->key[i] & UPPER_MASK) | (state->key[i + 1] & LOWER_MASK); + state->key[i] = state->key[i + M] ^ (y >> 1) ^ (-(y & 1) & MATRIX_A); + } + for (; i < N - 1; i++) { + y = (state->key[i] & UPPER_MASK) | (state->key[i + 1] & LOWER_MASK); + state->key[i] = state->key[i + (M - N)] ^ (y >> 1) ^ (-(y & 1) & MATRIX_A); + } + y = (state->key[N - 1] & UPPER_MASK) | (state->key[0] & LOWER_MASK); + state->key[N - 1] = state->key[M - 1] ^ (y >> 1) ^ (-(y & 1) & MATRIX_A); + + state->pos = 0; +} + +extern inline uint64_t mt19937_next64(mt19937_state *state); + +extern inline uint32_t mt19937_next32(mt19937_state *state); + +extern inline double mt19937_next_double(mt19937_state *state); + +void mt19937_jump(mt19937_state *state) { mt19937_jump_state(state, poly); } diff --git a/numpy/random/src/mt19937/mt19937.h b/numpy/random/src/mt19937/mt19937.h new file mode 100644 index 000000000..8105329ec --- /dev/null +++ b/numpy/random/src/mt19937/mt19937.h @@ -0,0 +1,69 @@ +#pragma once +#include <math.h> +#ifdef _WIN32 +#if _MSC_VER == 1500 +#include "../common/stdint.h" +#else +#include <stdint.h> +#endif +#else +#include <stdint.h> +#endif + +#ifdef _WIN32 +#define inline __forceinline +#endif + +#define RK_STATE_LEN 624 + +#define N 624 +#define M 397 +#define MATRIX_A 0x9908b0dfUL +#define UPPER_MASK 0x80000000UL +#define LOWER_MASK 0x7fffffffUL + +typedef struct s_mt19937_state { + uint32_t key[RK_STATE_LEN]; + int pos; +} mt19937_state; + +extern void mt19937_seed(mt19937_state *state, uint32_t seed); + +extern void mt19937_gen(mt19937_state *state); + +/* Slightly optimized reference implementation of the Mersenne Twister */ +static inline uint32_t mt19937_next(mt19937_state *state) { + uint32_t y; + + if (state->pos == RK_STATE_LEN) { + // Move to function to help inlining + mt19937_gen(state); + } + y = state->key[state->pos++]; + + /* Tempering */ + y ^= (y >> 11); + y ^= (y << 7) & 0x9d2c5680UL; + y ^= (y << 15) & 0xefc60000UL; + y ^= (y >> 18); + + return y; +} + +extern void mt19937_init_by_array(mt19937_state *state, uint32_t *init_key, + int key_length); + +static inline uint64_t mt19937_next64(mt19937_state *state) { + return (uint64_t)mt19937_next(state) << 32 | mt19937_next(state); +} + +static inline uint32_t mt19937_next32(mt19937_state *state) { + return mt19937_next(state); +} + +static inline double mt19937_next_double(mt19937_state *state) { + int32_t a = mt19937_next(state) >> 5, b = mt19937_next(state) >> 6; + return (a * 67108864.0 + b) / 9007199254740992.0; +} + +void mt19937_jump(mt19937_state *state); diff --git a/numpy/random/src/mt19937/randomkit.c b/numpy/random/src/mt19937/randomkit.c new file mode 100644 index 000000000..f8ed4b49e --- /dev/null +++ b/numpy/random/src/mt19937/randomkit.c @@ -0,0 +1,578 @@ +/* Random kit 1.3 */ + +/* + * Copyright (c) 2003-2005, Jean-Sebastien Roy (js@jeannot.org) + * + * The rk_random and rk_seed functions algorithms and the original design of + * the Mersenne Twister RNG: + * + * Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of its contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Original algorithm for the implementation of rk_interval function from + * Richard J. Wagner's implementation of the Mersenne Twister RNG, optimised by + * Magnus Jonsson. + * + * Constants used in the rk_double implementation by Isaku Wada. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* static char const rcsid[] = + "@(#) $Jeannot: randomkit.c,v 1.28 2005/07/21 22:14:09 js Exp $"; */ + +#ifdef _WIN32 +/* + * Windows + * XXX: we have to use this ugly defined(__GNUC__) because it is not easy to + * detect the compiler used in distutils itself + */ +#if (defined(__GNUC__) && defined(NPY_NEEDS_MINGW_TIME_WORKAROUND)) + +/* + * FIXME: ideally, we should set this to the real version of MSVCRT. We need + * something higher than 0x601 to enable _ftime64 and co + */ +#define __MSVCRT_VERSION__ 0x0700 +#include <sys/timeb.h> +#include <time.h> + +/* + * mingw msvcr lib import wrongly export _ftime, which does not exist in the + * actual msvc runtime for version >= 8; we make it an alias to _ftime64, which + * is available in those versions of the runtime + */ +#define _FTIME(x) _ftime64((x)) +#else +#include <sys/timeb.h> +#include <time.h> + +#define _FTIME(x) _ftime((x)) +#endif + +#ifndef RK_NO_WINCRYPT +/* Windows crypto */ +#ifndef _WIN32_WINNT +#define _WIN32_WINNT 0x0400 +#endif +#include <wincrypt.h> +#include <windows.h> + +#endif + +/* + * Do not move this include. randomkit.h must be included + * after windows timeb.h is included. + */ +#include "randomkit.h" + +#else +/* Unix */ +#include "randomkit.h" +#include <sys/time.h> +#include <time.h> +#include <unistd.h> + +#endif + +#include <assert.h> +#include <errno.h> +#include <limits.h> +#include <math.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> + +#ifndef RK_DEV_URANDOM +#define RK_DEV_URANDOM "/dev/urandom" +#endif + +#ifndef RK_DEV_RANDOM +#define RK_DEV_RANDOM "/dev/random" +#endif + +char *rk_strerror[RK_ERR_MAX] = {"no error", "random device unvavailable"}; + +/* static functions */ +static unsigned long rk_hash(unsigned long key); + +void rk_seed(unsigned long seed, rk_state *state) { + int pos; + seed &= 0xffffffffUL; + + /* Knuth's PRNG as used in the Mersenne Twister reference implementation */ + for (pos = 0; pos < RK_STATE_LEN; pos++) { + state->key[pos] = seed; + seed = (1812433253UL * (seed ^ (seed >> 30)) + pos + 1) & 0xffffffffUL; + } + state->pos = RK_STATE_LEN; + state->gauss = 0; + state->has_gauss = 0; + state->has_binomial = 0; +} + +/* Thomas Wang 32 bits integer hash function */ +unsigned long rk_hash(unsigned long key) { + key += ~(key << 15); + key ^= (key >> 10); + key += (key << 3); + key ^= (key >> 6); + key += ~(key << 11); + key ^= (key >> 16); + return key; +} + +rk_error rk_randomseed(rk_state *state) { +#ifndef _WIN32 + struct timeval tv; +#else + struct _timeb tv; +#endif + int i; + + if (rk_devfill(state->key, sizeof(state->key), 0) == RK_NOERR) { + /* ensures non-zero key */ + state->key[0] |= 0x80000000UL; + state->pos = RK_STATE_LEN; + state->gauss = 0; + state->has_gauss = 0; + state->has_binomial = 0; + + for (i = 0; i < 624; i++) { + state->key[i] &= 0xffffffffUL; + } + return RK_NOERR; + } + +#ifndef _WIN32 + gettimeofday(&tv, NULL); + rk_seed(rk_hash(getpid()) ^ rk_hash(tv.tv_sec) ^ rk_hash(tv.tv_usec) ^ + rk_hash(clock()), + state); +#else + _FTIME(&tv); + rk_seed(rk_hash(tv.time) ^ rk_hash(tv.millitm) ^ rk_hash(clock()), state); +#endif + + return RK_ENODEV; +} + +/* Magic Mersenne Twister constants */ +#define N 624 +#define M 397 +#define MATRIX_A 0x9908b0dfUL +#define UPPER_MASK 0x80000000UL +#define LOWER_MASK 0x7fffffffUL + +/* + * Slightly optimised reference implementation of the Mersenne Twister + * Note that regardless of the precision of long, only 32 bit random + * integers are produced + */ +unsigned long rk_random(rk_state *state) { + unsigned long y; + + if (state->pos == RK_STATE_LEN) { + int i; + + for (i = 0; i < N - M; i++) { + y = (state->key[i] & UPPER_MASK) | (state->key[i + 1] & LOWER_MASK); + state->key[i] = state->key[i + M] ^ (y >> 1) ^ (-(y & 1) & MATRIX_A); + } + for (; i < N - 1; i++) { + y = (state->key[i] & UPPER_MASK) | (state->key[i + 1] & LOWER_MASK); + state->key[i] = + state->key[i + (M - N)] ^ (y >> 1) ^ (-(y & 1) & MATRIX_A); + } + y = (state->key[N - 1] & UPPER_MASK) | (state->key[0] & LOWER_MASK); + state->key[N - 1] = state->key[M - 1] ^ (y >> 1) ^ (-(y & 1) & MATRIX_A); + + state->pos = 0; + } + y = state->key[state->pos++]; + + /* Tempering */ + y ^= (y >> 11); + y ^= (y << 7) & 0x9d2c5680UL; + y ^= (y << 15) & 0xefc60000UL; + y ^= (y >> 18); + + return y; +} + +/* + * Returns an unsigned 64 bit random integer. + */ +NPY_INLINE static npy_uint64 rk_uint64(rk_state *state) { + npy_uint64 upper = (npy_uint64)rk_random(state) << 32; + npy_uint64 lower = (npy_uint64)rk_random(state); + return upper | lower; +} + +/* + * Returns an unsigned 32 bit random integer. + */ +NPY_INLINE static npy_uint32 rk_uint32(rk_state *state) { + return (npy_uint32)rk_random(state); +} + +/* + * Fills an array with cnt random npy_uint64 between off and off + rng + * inclusive. The numbers wrap if rng is sufficiently large. + */ +void rk_random_uint64(npy_uint64 off, npy_uint64 rng, npy_intp cnt, + npy_uint64 *out, rk_state *state) { + npy_uint64 val, mask = rng; + npy_intp i; + + if (rng == 0) { + for (i = 0; i < cnt; i++) { + out[i] = off; + } + return; + } + + /* Smallest bit mask >= max */ + mask |= mask >> 1; + mask |= mask >> 2; + mask |= mask >> 4; + mask |= mask >> 8; + mask |= mask >> 16; + mask |= mask >> 32; + + for (i = 0; i < cnt; i++) { + if (rng <= 0xffffffffUL) { + while ((val = (rk_uint32(state) & mask)) > rng) + ; + } else { + while ((val = (rk_uint64(state) & mask)) > rng) + ; + } + out[i] = off + val; + } +} + +/* + * Fills an array with cnt random npy_uint32 between off and off + rng + * inclusive. The numbers wrap if rng is sufficiently large. + */ +void rk_random_uint32(npy_uint32 off, npy_uint32 rng, npy_intp cnt, + npy_uint32 *out, rk_state *state) { + npy_uint32 val, mask = rng; + npy_intp i; + + if (rng == 0) { + for (i = 0; i < cnt; i++) { + out[i] = off; + } + return; + } + + /* Smallest bit mask >= max */ + mask |= mask >> 1; + mask |= mask >> 2; + mask |= mask >> 4; + mask |= mask >> 8; + mask |= mask >> 16; + + for (i = 0; i < cnt; i++) { + while ((val = (rk_uint32(state) & mask)) > rng) + ; + out[i] = off + val; + } +} + +/* + * Fills an array with cnt random npy_uint16 between off and off + rng + * inclusive. The numbers wrap if rng is sufficiently large. + */ +void rk_random_uint16(npy_uint16 off, npy_uint16 rng, npy_intp cnt, + npy_uint16 *out, rk_state *state) { + npy_uint16 val, mask = rng; + npy_intp i; + npy_uint32 buf; + int bcnt = 0; + + if (rng == 0) { + for (i = 0; i < cnt; i++) { + out[i] = off; + } + return; + } + + /* Smallest bit mask >= max */ + mask |= mask >> 1; + mask |= mask >> 2; + mask |= mask >> 4; + mask |= mask >> 8; + + for (i = 0; i < cnt; i++) { + do { + if (!bcnt) { + buf = rk_uint32(state); + bcnt = 1; + } else { + buf >>= 16; + bcnt--; + } + val = (npy_uint16)buf & mask; + } while (val > rng); + out[i] = off + val; + } +} + +/* + * Fills an array with cnt random npy_uint8 between off and off + rng + * inclusive. The numbers wrap if rng is sufficiently large. + */ +void rk_random_uint8(npy_uint8 off, npy_uint8 rng, npy_intp cnt, npy_uint8 *out, + rk_state *state) { + npy_uint8 val, mask = rng; + npy_intp i; + npy_uint32 buf; + int bcnt = 0; + + if (rng == 0) { + for (i = 0; i < cnt; i++) { + out[i] = off; + } + return; + } + + /* Smallest bit mask >= max */ + mask |= mask >> 1; + mask |= mask >> 2; + mask |= mask >> 4; + + for (i = 0; i < cnt; i++) { + do { + if (!bcnt) { + buf = rk_uint32(state); + bcnt = 3; + } else { + buf >>= 8; + bcnt--; + } + val = (npy_uint8)buf & mask; + } while (val > rng); + out[i] = off + val; + } +} + +/* + * Fills an array with cnt random npy_bool between off and off + rng + * inclusive. + */ +void rk_random_bool(npy_bool off, npy_bool rng, npy_intp cnt, npy_bool *out, + rk_state *state) { + npy_intp i; + npy_uint32 buf; + int bcnt = 0; + + if (rng == 0) { + for (i = 0; i < cnt; i++) { + out[i] = off; + } + return; + } + + /* If we reach here rng and mask are one and off is zero */ + assert(rng == 1 && off == 0); + for (i = 0; i < cnt; i++) { + if (!bcnt) { + buf = rk_uint32(state); + bcnt = 31; + } else { + buf >>= 1; + bcnt--; + } + out[i] = (buf & 0x00000001) != 0; + } +} + +long rk_long(rk_state *state) { return rk_ulong(state) >> 1; } + +unsigned long rk_ulong(rk_state *state) { +#if ULONG_MAX <= 0xffffffffUL + return rk_random(state); +#else + return (rk_random(state) << 32) | (rk_random(state)); +#endif +} + +unsigned long rk_interval(unsigned long max, rk_state *state) { + unsigned long mask = max, value; + + if (max == 0) { + return 0; + } + /* Smallest bit mask >= max */ + mask |= mask >> 1; + mask |= mask >> 2; + mask |= mask >> 4; + mask |= mask >> 8; + mask |= mask >> 16; +#if ULONG_MAX > 0xffffffffUL + mask |= mask >> 32; +#endif + + /* Search a random value in [0..mask] <= max */ +#if ULONG_MAX > 0xffffffffUL + if (max <= 0xffffffffUL) { + while ((value = (rk_random(state) & mask)) > max) + ; + } else { + while ((value = (rk_ulong(state) & mask)) > max) + ; + } +#else + while ((value = (rk_ulong(state) & mask)) > max) + ; +#endif + return value; +} + +double rk_double(rk_state *state) { + /* shifts : 67108864 = 0x4000000, 9007199254740992 = 0x20000000000000 */ + long a = rk_random(state) >> 5, b = rk_random(state) >> 6; + return (a * 67108864.0 + b) / 9007199254740992.0; +} + +void rk_fill(void *buffer, size_t size, rk_state *state) { + unsigned long r; + unsigned char *buf = buffer; + + for (; size >= 4; size -= 4) { + r = rk_random(state); + *(buf++) = r & 0xFF; + *(buf++) = (r >> 8) & 0xFF; + *(buf++) = (r >> 16) & 0xFF; + *(buf++) = (r >> 24) & 0xFF; + } + + if (!size) { + return; + } + r = rk_random(state); + for (; size; r >>= 8, size--) { + *(buf++) = (unsigned char)(r & 0xFF); + } +} + +rk_error rk_devfill(void *buffer, size_t size, int strong) { +#ifndef _WIN32 + FILE *rfile; + int done; + + if (strong) { + rfile = fopen(RK_DEV_RANDOM, "rb"); + } else { + rfile = fopen(RK_DEV_URANDOM, "rb"); + } + if (rfile == NULL) { + return RK_ENODEV; + } + done = fread(buffer, size, 1, rfile); + fclose(rfile); + if (done) { + return RK_NOERR; + } +#else + +#ifndef RK_NO_WINCRYPT + HCRYPTPROV hCryptProv; + BOOL done; + + if (!CryptAcquireContext(&hCryptProv, NULL, NULL, PROV_RSA_FULL, + CRYPT_VERIFYCONTEXT) || + !hCryptProv) { + return RK_ENODEV; + } + done = CryptGenRandom(hCryptProv, size, (unsigned char *)buffer); + CryptReleaseContext(hCryptProv, 0); + if (done) { + return RK_NOERR; + } +#endif + +#endif + return RK_ENODEV; +} + +rk_error rk_altfill(void *buffer, size_t size, int strong, rk_state *state) { + rk_error err; + + err = rk_devfill(buffer, size, strong); + if (err) { + rk_fill(buffer, size, state); + } + return err; +} + +double rk_gauss(rk_state *state) { + if (state->has_gauss) { + const double tmp = state->gauss; + state->gauss = 0; + state->has_gauss = 0; + return tmp; + } else { + double f, x1, x2, r2; + + do { + x1 = 2.0 * rk_double(state) - 1.0; + x2 = 2.0 * rk_double(state) - 1.0; + r2 = x1 * x1 + x2 * x2; + } while (r2 >= 1.0 || r2 == 0.0); + + /* Polar method, a more efficient version of the Box-Muller approach. */ + f = sqrt(-2.0 * log(r2) / r2); + /* Keep for next call */ + state->gauss = f * x1; + state->has_gauss = 1; + return f * x2; + } +} diff --git a/numpy/random/src/mt19937/randomkit.h b/numpy/random/src/mt19937/randomkit.h new file mode 100644 index 000000000..abb082cb2 --- /dev/null +++ b/numpy/random/src/mt19937/randomkit.h @@ -0,0 +1,223 @@ +/* Random kit 1.3 */ + +/* + * Copyright (c) 2003-2005, Jean-Sebastien Roy (js@jeannot.org) + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* @(#) $Jeannot: randomkit.h,v 1.24 2005/07/21 22:14:09 js Exp $ */ + +/* + * Typical use: + * + * { + * rk_state state; + * unsigned long seed = 1, random_value; + * + * rk_seed(seed, &state); // Initialize the RNG + * ... + * random_value = rk_random(&state); // Generate random values in [0..RK_MAX] + * } + * + * Instead of rk_seed, you can use rk_randomseed which will get a random seed + * from /dev/urandom (or the clock, if /dev/urandom is unavailable): + * + * { + * rk_state state; + * unsigned long random_value; + * + * rk_randomseed(&state); // Initialize the RNG with a random seed + * ... + * random_value = rk_random(&state); // Generate random values in [0..RK_MAX] + * } + */ + +/* + * Useful macro: + * RK_DEV_RANDOM: the device used for random seeding. + * defaults to "/dev/urandom" + */ + +#ifndef _RANDOMKIT_ +#define _RANDOMKIT_ + +#include <numpy/npy_common.h> +#include <stddef.h> + +#define RK_STATE_LEN 624 + +typedef struct rk_state_ { + unsigned long key[RK_STATE_LEN]; + int pos; + int has_gauss; /* !=0: gauss contains a gaussian deviate */ + double gauss; + + /* The rk_state structure has been extended to store the following + * information for the binomial generator. If the input values of n or p + * are different than nsave and psave, then the other parameters will be + * recomputed. RTK 2005-09-02 */ + + int has_binomial; /* !=0: following parameters initialized for + binomial */ + double psave; + long nsave; + double r; + double q; + double fm; + long m; + double p1; + double xm; + double xl; + double xr; + double c; + double laml; + double lamr; + double p2; + double p3; + double p4; + +} rk_state; + +typedef enum { + RK_NOERR = 0, /* no error */ + RK_ENODEV = 1, /* no RK_DEV_RANDOM device */ + RK_ERR_MAX = 2 +} rk_error; + +/* error strings */ +extern char *rk_strerror[RK_ERR_MAX]; + +/* Maximum generated random value */ +#define RK_MAX 0xFFFFFFFFUL + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Initialize the RNG state using the given seed. + */ +extern void rk_seed(unsigned long seed, rk_state *state); + +/* + * Initialize the RNG state using a random seed. + * Uses /dev/random or, when unavailable, the clock (see randomkit.c). + * Returns RK_NOERR when no errors occurs. + * Returns RK_ENODEV when the use of RK_DEV_RANDOM failed (for example because + * there is no such device). In this case, the RNG was initialized using the + * clock. + */ +extern rk_error rk_randomseed(rk_state *state); + +/* + * Returns a random unsigned long between 0 and RK_MAX inclusive + */ +extern unsigned long rk_random(rk_state *state); + +/* + * Returns a random long between 0 and LONG_MAX inclusive + */ +extern long rk_long(rk_state *state); + +/* + * Returns a random unsigned long between 0 and ULONG_MAX inclusive + */ +extern unsigned long rk_ulong(rk_state *state); + +/* + * Returns a random unsigned long between 0 and max inclusive. + */ +extern unsigned long rk_interval(unsigned long max, rk_state *state); + +/* + * Fills an array with cnt random npy_uint64 between off and off + rng + * inclusive. The numbers wrap if rng is sufficiently large. + */ +extern void rk_random_uint64(npy_uint64 off, npy_uint64 rng, npy_intp cnt, + npy_uint64 *out, rk_state *state); + +/* + * Fills an array with cnt random npy_uint32 between off and off + rng + * inclusive. The numbers wrap if rng is sufficiently large. + */ +extern void rk_random_uint32(npy_uint32 off, npy_uint32 rng, npy_intp cnt, + npy_uint32 *out, rk_state *state); + +/* + * Fills an array with cnt random npy_uint16 between off and off + rng + * inclusive. The numbers wrap if rng is sufficiently large. + */ +extern void rk_random_uint16(npy_uint16 off, npy_uint16 rng, npy_intp cnt, + npy_uint16 *out, rk_state *state); + +/* + * Fills an array with cnt random npy_uint8 between off and off + rng + * inclusive. The numbers wrap if rng is sufficiently large. + */ +extern void rk_random_uint8(npy_uint8 off, npy_uint8 rng, npy_intp cnt, + npy_uint8 *out, rk_state *state); + +/* + * Fills an array with cnt random npy_bool between off and off + rng + * inclusive. It is assumed tha npy_bool as the same size as npy_uint8. + */ +extern void rk_random_bool(npy_bool off, npy_bool rng, npy_intp cnt, + npy_bool *out, rk_state *state); + +/* + * Returns a random double between 0.0 and 1.0, 1.0 excluded. + */ +extern double rk_double(rk_state *state); + +/* + * fill the buffer with size random bytes + */ +extern void rk_fill(void *buffer, size_t size, rk_state *state); + +/* + * fill the buffer with randombytes from the random device + * Returns RK_ENODEV if the device is unavailable, or RK_NOERR if it is + * On Unix, if strong is defined, RK_DEV_RANDOM is used. If not, RK_DEV_URANDOM + * is used instead. This parameter has no effect on Windows. + * Warning: on most unixes RK_DEV_RANDOM will wait for enough entropy to answer + * which can take a very long time on quiet systems. + */ +extern rk_error rk_devfill(void *buffer, size_t size, int strong); + +/* + * fill the buffer using rk_devfill if the random device is available and using + * rk_fill if it is not + * parameters have the same meaning as rk_fill and rk_devfill + * Returns RK_ENODEV if the device is unavailable, or RK_NOERR if it is + */ +extern rk_error rk_altfill(void *buffer, size_t size, int strong, + rk_state *state); + +/* + * return a random gaussian deviate with variance unity and zero mean. + */ +extern double rk_gauss(rk_state *state); + +#ifdef __cplusplus +} +#endif + +#endif /* _RANDOMKIT_ */ diff --git a/numpy/random/src/pcg32/LICENSE.md b/numpy/random/src/pcg32/LICENSE.md new file mode 100644 index 000000000..3db2ac2e8 --- /dev/null +++ b/numpy/random/src/pcg32/LICENSE.md @@ -0,0 +1,22 @@ +# PCG32 + +PCG Random Number Generation for C. + +Copyright 2014 Melissa O'Neill <oneill@pcg-random.org> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +For additional information about the PCG random number generation scheme, +including its license and other licensing options, visit + + http://www.pcg-random.org diff --git a/numpy/random/src/pcg32/pcg-advance-64.c b/numpy/random/src/pcg32/pcg-advance-64.c new file mode 100644 index 000000000..8210e7565 --- /dev/null +++ b/numpy/random/src/pcg32/pcg-advance-64.c @@ -0,0 +1,62 @@ +/* + * PCG Random Number Generation for C. + * + * Copyright 2014 Melissa O'Neill <oneill@pcg-random.org> + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For additional information about the PCG random number generation scheme, + * including its license and other licensing options, visit + * + * http://www.pcg-random.org + */ + +/* + * This code is derived from the canonical C++ PCG implementation, which + * has many additional features and is preferable if you can use C++ in + * your project. + * + * Repetative C code is derived using C preprocessor metaprogramming + * techniques. + */ + +#include "pcg_variants.h" + +/* Multi-step advance functions (jump-ahead, jump-back) + * + * The method used here is based on Brown, "Random Number Generation + * with Arbitrary Stride,", Transactions of the American Nuclear + * Society (Nov. 1994). The algorithm is very similar to fast + * exponentiation. + * + * Even though delta is an unsigned integer, we can pass a + * signed integer to go backwards, it just goes "the long way round". + */ + +uint64_t pcg_advance_lcg_64(uint64_t state, uint64_t delta, uint64_t cur_mult, + uint64_t cur_plus) +{ + uint64_t acc_mult = 1u; + uint64_t acc_plus = 0u; + while (delta > 0) { + if (delta & 1) { + acc_mult *= cur_mult; + acc_plus = acc_plus * cur_mult + cur_plus; + } + cur_plus = (cur_mult + 1) * cur_plus; + cur_mult *= cur_mult; + delta /= 2; + } + return acc_mult * state + acc_plus; +} + diff --git a/numpy/random/src/pcg32/pcg32-test-data-gen.c b/numpy/random/src/pcg32/pcg32-test-data-gen.c new file mode 100644 index 000000000..cccaf84b9 --- /dev/null +++ b/numpy/random/src/pcg32/pcg32-test-data-gen.c @@ -0,0 +1,59 @@ +/* + * Generate testing csv files + * + * + * gcc pcg32-test-data-gen.c pcg32.orig.c ../splitmix64/splitmix64.c -o + * pgc64-test-data-gen + */ + +#include "pcg_variants.h" +#include <inttypes.h> +#include <stdio.h> + +#define N 1000 + +int main() { + pcg32_random_t rng; + uint64_t inc, seed = 0xDEADBEAF; + inc = 0; + int i; + uint64_t store[N]; + pcg32_srandom_r(&rng, seed, inc); + for (i = 0; i < N; i++) { + store[i] = pcg32_random_r(&rng); + } + + FILE *fp; + fp = fopen("pcg32-testset-1.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); + + seed = 0; + pcg32_srandom_r(&rng, seed, inc); + for (i = 0; i < N; i++) { + store[i] = pcg32_random_r(&rng); + } + fp = fopen("pcg32-testset-2.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); +} diff --git a/numpy/random/src/pcg32/pcg32.c b/numpy/random/src/pcg32/pcg32.c new file mode 100644 index 000000000..5fbf6759f --- /dev/null +++ b/numpy/random/src/pcg32/pcg32.c @@ -0,0 +1,30 @@ +#include "pcg32.h" + +extern inline uint64_t pcg32_next64(pcg32_state *state); +extern inline uint32_t pcg32_next32(pcg32_state *state); +extern inline double pcg32_next_double(pcg32_state *state); + +uint64_t pcg_advance_lcg_64(uint64_t state, uint64_t delta, uint64_t cur_mult, + uint64_t cur_plus) { + uint64_t acc_mult, acc_plus; + acc_mult = 1u; + acc_plus = 0u; + while (delta > 0) { + if (delta & 1) { + acc_mult *= cur_mult; + acc_plus = acc_plus * cur_mult + cur_plus; + } + cur_plus = (cur_mult + 1) * cur_plus; + cur_mult *= cur_mult; + delta /= 2; + } + return acc_mult * state + acc_plus; +} + +extern void pcg32_advance_state(pcg32_state *state, uint64_t step) { + pcg32_advance_r(state->pcg_state, step); +} + +extern void pcg32_set_seed(pcg32_state *state, uint64_t seed, uint64_t inc) { + pcg32_srandom_r(state->pcg_state, seed, inc); +} diff --git a/numpy/random/src/pcg32/pcg32.h b/numpy/random/src/pcg32/pcg32.h new file mode 100644 index 000000000..557113d8f --- /dev/null +++ b/numpy/random/src/pcg32/pcg32.h @@ -0,0 +1,89 @@ +#ifndef _RANDOMDGEN__PCG32_H_ +#define _RANDOMDGEN__PCG32_H_ + +#ifdef _WIN32 +#ifndef _INTTYPES +#include "../common/stdint.h" +#endif +#define inline __inline __forceinline +#else +#include <inttypes.h> +#endif + +#define PCG_DEFAULT_MULTIPLIER_64 6364136223846793005ULL + +struct pcg_state_setseq_64 { + uint64_t state; + uint64_t inc; +}; + +static inline uint32_t pcg_rotr_32(uint32_t value, unsigned int rot) { +#if PCG_USE_INLINE_ASM && __clang__ && (__x86_64__ || __i386__) + asm("rorl %%cl, %0" : "=r"(value) : "0"(value), "c"(rot)); + return value; +#else + return (value >> rot) | (value << ((-rot) & 31)); +#endif +} + +static inline void pcg_setseq_64_step_r(struct pcg_state_setseq_64 *rng) { + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_64 + rng->inc; +} + +static inline uint32_t pcg_output_xsh_rr_64_32(uint64_t state) { + return pcg_rotr_32(((state >> 18u) ^ state) >> 27u, state >> 59u); +} + +static inline uint32_t +pcg_setseq_64_xsh_rr_32_random_r(struct pcg_state_setseq_64 *rng) { + uint64_t oldstate; + oldstate = rng->state; + pcg_setseq_64_step_r(rng); + return pcg_output_xsh_rr_64_32(oldstate); +} + +static inline void pcg_setseq_64_srandom_r(struct pcg_state_setseq_64 *rng, + uint64_t initstate, + uint64_t initseq) { + rng->state = 0U; + rng->inc = (initseq << 1u) | 1u; + pcg_setseq_64_step_r(rng); + rng->state += initstate; + pcg_setseq_64_step_r(rng); +} + +extern uint64_t pcg_advance_lcg_64(uint64_t state, uint64_t delta, + uint64_t cur_mult, uint64_t cur_plus); + +static inline void pcg_setseq_64_advance_r(struct pcg_state_setseq_64 *rng, + uint64_t delta) { + rng->state = pcg_advance_lcg_64(rng->state, delta, PCG_DEFAULT_MULTIPLIER_64, + rng->inc); +} + +typedef struct pcg_state_setseq_64 pcg32_random_t; +#define pcg32_random_r pcg_setseq_64_xsh_rr_32_random_r +#define pcg32_srandom_r pcg_setseq_64_srandom_r +#define pcg32_advance_r pcg_setseq_64_advance_r + +typedef struct s_pcg32_state { pcg32_random_t *pcg_state; } pcg32_state; + +static inline uint64_t pcg32_next64(pcg32_state *state) { + return (uint64_t)(pcg32_random_r(state->pcg_state)) << 32 | + pcg32_random_r(state->pcg_state); +} + +static inline uint32_t pcg32_next32(pcg32_state *state) { + return pcg32_random_r(state->pcg_state); +} + +static inline double pcg32_next_double(pcg32_state *state) { + int32_t a = pcg32_random_r(state->pcg_state) >> 5, + b = pcg32_random_r(state->pcg_state) >> 6; + return (a * 67108864.0 + b) / 9007199254740992.0; +} + +void pcg32_advance_state(pcg32_state *state, uint64_t step); +void pcg32_set_seed(pcg32_state *state, uint64_t seed, uint64_t inc); + +#endif diff --git a/numpy/random/src/pcg32/pcg_variants.h b/numpy/random/src/pcg32/pcg_variants.h new file mode 100644 index 000000000..32daac1ce --- /dev/null +++ b/numpy/random/src/pcg32/pcg_variants.h @@ -0,0 +1,2210 @@ +/* + * PCG Random Number Generation for C. + * + * Copyright 2014 Melissa O'Neill <oneill@pcg-random.org> + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For additional information about the PCG random number generation scheme, + * including its license and other licensing options, visit + * + * http://www.pcg-random.org + */ + +/* + * This code is derived from the canonical C++ PCG implementation, which + * has many additional features and is preferable if you can use C++ in + * your project. + * + * Much of the derivation was performed mechanically. In particular, the + * output functions were generated by compiling the C++ output functions + * into LLVM bitcode and then transforming that using the LLVM C backend + * (from https://github.com/draperlaboratory/llvm-cbe), and then + * postprocessing and hand editing the output. + * + * Much of the remaining code was generated by C-preprocessor metaprogramming. + */ + +#ifndef PCG_VARIANTS_H_INCLUDED +#define PCG_VARIANTS_H_INCLUDED 1 + +#include <inttypes.h> + +#if __SIZEOF_INT128__ + typedef __uint128_t pcg128_t; + #define PCG_128BIT_CONSTANT(high,low) \ + ((((pcg128_t)high) << 64) + low) + #define PCG_HAS_128BIT_OPS 1 +#endif + +#if __GNUC_GNU_INLINE__ && !defined(__cplusplus) + #error Nonstandard GNU inlining semantics. Compile with -std=c99 or better. + // We could instead use macros PCG_INLINE and PCG_EXTERN_INLINE + // but better to just reject ancient C code. +#endif + +#if __cplusplus +extern "C" { +#endif + +/* + * Rotate helper functions. + */ + +inline uint8_t pcg_rotr_8(uint8_t value, unsigned int rot) +{ +/* Unfortunately, clang is kinda pathetic when it comes to properly + * recognizing idiomatic rotate code, so for clang we actually provide + * assembler directives (enabled with PCG_USE_INLINE_ASM). Boo, hiss. + */ +#if PCG_USE_INLINE_ASM && __clang__ && (__x86_64__ || __i386__) + asm ("rorb %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); + return value; +#else + return (value >> rot) | (value << ((- rot) & 7)); +#endif +} + +inline uint16_t pcg_rotr_16(uint16_t value, unsigned int rot) +{ +#if PCG_USE_INLINE_ASM && __clang__ && (__x86_64__ || __i386__) + asm ("rorw %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); + return value; +#else + return (value >> rot) | (value << ((- rot) & 15)); +#endif +} + +inline uint32_t pcg_rotr_32(uint32_t value, unsigned int rot) +{ +#if PCG_USE_INLINE_ASM && __clang__ && (__x86_64__ || __i386__) + asm ("rorl %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); + return value; +#else + return (value >> rot) | (value << ((- rot) & 31)); +#endif +} + +inline uint64_t pcg_rotr_64(uint64_t value, unsigned int rot) +{ +#if 0 && PCG_USE_INLINE_ASM && __clang__ && __x86_64__ + // For whatever reason, clang actually *does* generate rotq by + // itself, so we don't need this code. + asm ("rorq %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); + return value; +#else + return (value >> rot) | (value << ((- rot) & 63)); +#endif +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t pcg_rotr_128(pcg128_t value, unsigned int rot) +{ + return (value >> rot) | (value << ((- rot) & 127)); +} +#endif + +/* + * Output functions. These are the core of the PCG generation scheme. + */ + +// XSH RS + +inline uint8_t pcg_output_xsh_rs_16_8(uint16_t state) +{ + return (uint8_t)(((state >> 7u) ^ state) >> ((state >> 14u) + 3u)); +} + +inline uint16_t pcg_output_xsh_rs_32_16(uint32_t state) +{ + return (uint16_t)(((state >> 11u) ^ state) >> ((state >> 30u) + 11u)); +} + +inline uint32_t pcg_output_xsh_rs_64_32(uint64_t state) +{ + + return (uint32_t)(((state >> 22u) ^ state) >> ((state >> 61u) + 22u)); +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_output_xsh_rs_128_64(pcg128_t state) +{ + return (uint64_t)(((state >> 43u) ^ state) >> ((state >> 124u) + 45u)); +} +#endif + +// XSH RR + +inline uint8_t pcg_output_xsh_rr_16_8(uint16_t state) +{ + return pcg_rotr_8(((state >> 5u) ^ state) >> 5u, state >> 13u); +} + +inline uint16_t pcg_output_xsh_rr_32_16(uint32_t state) +{ + return pcg_rotr_16(((state >> 10u) ^ state) >> 12u, state >> 28u); +} + +inline uint32_t pcg_output_xsh_rr_64_32(uint64_t state) +{ + return pcg_rotr_32(((state >> 18u) ^ state) >> 27u, state >> 59u); +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_output_xsh_rr_128_64(pcg128_t state) +{ + return pcg_rotr_64(((state >> 29u) ^ state) >> 58u, state >> 122u); +} +#endif + +// RXS M XS + +inline uint8_t pcg_output_rxs_m_xs_8_8(uint8_t state) +{ + uint8_t word = ((state >> ((state >> 6u) + 2u)) ^ state) * 217u; + return (word >> 6u) ^ word; +} + +inline uint16_t pcg_output_rxs_m_xs_16_16(uint16_t state) +{ + uint16_t word = ((state >> ((state >> 13u) + 3u)) ^ state) * 62169u; + return (word >> 11u) ^ word; +} + +inline uint32_t pcg_output_rxs_m_xs_32_32(uint32_t state) +{ + uint32_t word = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u; + return (word >> 22u) ^ word; +} + +inline uint64_t pcg_output_rxs_m_xs_64_64(uint64_t state) +{ + uint64_t word = ((state >> ((state >> 59u) + 5u)) ^ state) + * 12605985483714917081ull; + return (word >> 43u) ^ word; +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t pcg_output_rxs_m_xs_128_128(pcg128_t state) +{ + pcg128_t word = ((state >> ((state >> 122u) + 6u)) ^ state) + * (PCG_128BIT_CONSTANT(17766728186571221404ULL, + 12605985483714917081ULL)); + // 327738287884841127335028083622016905945 + return (word >> 86u) ^ word; +} +#endif + +// XSL RR (only defined for >= 64 bits) + +inline uint32_t pcg_output_xsl_rr_64_32(uint64_t state) +{ + return pcg_rotr_32(((uint32_t)(state >> 32u)) ^ (uint32_t)state, + state >> 59u); +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_output_xsl_rr_128_64(pcg128_t state) +{ + return pcg_rotr_64(((uint64_t)(state >> 64u)) ^ (uint64_t)state, + state >> 122u); +} +#endif + +// XSL RR RR (only defined for >= 64 bits) + +inline uint64_t pcg_output_xsl_rr_rr_64_64(uint64_t state) +{ + uint32_t rot1 = (uint32_t)(state >> 59u); + uint32_t high = (uint32_t)(state >> 32u); + uint32_t low = (uint32_t)state; + uint32_t xored = high ^ low; + uint32_t newlow = pcg_rotr_32(xored, rot1); + uint32_t newhigh = pcg_rotr_32(high, newlow & 31u); + return (((uint64_t)newhigh) << 32u) | newlow; +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t pcg_output_xsl_rr_rr_128_128(pcg128_t state) +{ + uint32_t rot1 = (uint32_t)(state >> 122u); + uint64_t high = (uint64_t)(state >> 64u); + uint64_t low = (uint64_t)state; + uint64_t xored = high ^ low; + uint64_t newlow = pcg_rotr_64(xored, rot1); + uint64_t newhigh = pcg_rotr_64(high, newlow & 63u); + return (((pcg128_t)newhigh) << 64u) | newlow; +} +#endif + +#define PCG_DEFAULT_MULTIPLIER_8 141U +#define PCG_DEFAULT_MULTIPLIER_16 12829U +#define PCG_DEFAULT_MULTIPLIER_32 747796405U +#define PCG_DEFAULT_MULTIPLIER_64 6364136223846793005ULL + +#define PCG_DEFAULT_INCREMENT_8 77U +#define PCG_DEFAULT_INCREMENT_16 47989U +#define PCG_DEFAULT_INCREMENT_32 2891336453U +#define PCG_DEFAULT_INCREMENT_64 1442695040888963407ULL + +#if PCG_HAS_128BIT_OPS +#define PCG_DEFAULT_MULTIPLIER_128 \ + PCG_128BIT_CONSTANT(2549297995355413924ULL,4865540595714422341ULL) +#define PCG_DEFAULT_INCREMENT_128 \ + PCG_128BIT_CONSTANT(6364136223846793005ULL,1442695040888963407ULL) +#endif + +/* + * Static initialization constants (if you can't call srandom for some + * bizarre reason). + */ + +#define PCG_STATE_ONESEQ_8_INITIALIZER { 0xd7U } +#define PCG_STATE_ONESEQ_16_INITIALIZER { 0x20dfU } +#define PCG_STATE_ONESEQ_32_INITIALIZER { 0x46b56677U } +#define PCG_STATE_ONESEQ_64_INITIALIZER { 0x4d595df4d0f33173ULL } +#if PCG_HAS_128BIT_OPS +#define PCG_STATE_ONESEQ_128_INITIALIZER \ + { PCG_128BIT_CONSTANT(0xb8dc10e158a92392ULL, 0x98046df007ec0a53ULL) } +#endif + +#define PCG_STATE_UNIQUE_8_INITIALIZER PCG_STATE_ONESEQ_8_INITIALIZER +#define PCG_STATE_UNIQUE_16_INITIALIZER PCG_STATE_ONESEQ_16_INITIALIZER +#define PCG_STATE_UNIQUE_32_INITIALIZER PCG_STATE_ONESEQ_32_INITIALIZER +#define PCG_STATE_UNIQUE_64_INITIALIZER PCG_STATE_ONESEQ_64_INITIALIZER +#if PCG_HAS_128BIT_OPS +#define PCG_STATE_UNIQUE_128_INITIALIZER PCG_STATE_ONESEQ_128_INITIALIZER +#endif + +#define PCG_STATE_MCG_8_INITIALIZER { 0xe5U } +#define PCG_STATE_MCG_16_INITIALIZER { 0xa5e5U } +#define PCG_STATE_MCG_32_INITIALIZER { 0xd15ea5e5U } +#define PCG_STATE_MCG_64_INITIALIZER { 0xcafef00dd15ea5e5ULL } +#if PCG_HAS_128BIT_OPS +#define PCG_STATE_MCG_128_INITIALIZER \ + { PCG_128BIT_CONSTANT(0x0000000000000000ULL, 0xcafef00dd15ea5e5ULL) } +#endif + +#define PCG_STATE_SETSEQ_8_INITIALIZER { 0x9bU, 0xdbU } +#define PCG_STATE_SETSEQ_16_INITIALIZER { 0xe39bU, 0x5bdbU } +#define PCG_STATE_SETSEQ_32_INITIALIZER { 0xec02d89bU, 0x94b95bdbU } +#define PCG_STATE_SETSEQ_64_INITIALIZER \ + { 0x853c49e6748fea9bULL, 0xda3e39cb94b95bdbULL } +#if PCG_HAS_128BIT_OPS +#define PCG_STATE_SETSEQ_128_INITIALIZER \ + { PCG_128BIT_CONSTANT(0x979c9a98d8462005ULL, 0x7d3e9cb6cfe0549bULL), \ + PCG_128BIT_CONSTANT(0x0000000000000001ULL, 0xda3e39cb94b95bdbULL) } +#endif + +/* Representations for the oneseq, mcg, and unique variants */ + +struct pcg_state_8 { + uint8_t state; +}; + +struct pcg_state_16 { + uint16_t state; +}; + +struct pcg_state_32 { + uint32_t state; +}; + +struct pcg_state_64 { + uint64_t state; +}; + +#if PCG_HAS_128BIT_OPS +struct pcg_state_128 { + pcg128_t state; +}; +#endif + +/* Representations setseq variants */ + +struct pcg_state_setseq_8 { + uint8_t state; + uint8_t inc; +}; + +struct pcg_state_setseq_16 { + uint16_t state; + uint16_t inc; +}; + +struct pcg_state_setseq_32 { + uint32_t state; + uint32_t inc; +}; + +struct pcg_state_setseq_64 { + uint64_t state; + uint64_t inc; +}; + +#if PCG_HAS_128BIT_OPS +struct pcg_state_setseq_128 { + pcg128_t state; + pcg128_t inc; +}; +#endif + +/* Multi-step advance functions (jump-ahead, jump-back) */ + +extern uint8_t pcg_advance_lcg_8(uint8_t state, uint8_t delta, uint8_t cur_mult, + uint8_t cur_plus); +extern uint16_t pcg_advance_lcg_16(uint16_t state, uint16_t delta, + uint16_t cur_mult, uint16_t cur_plus); +extern uint32_t pcg_advance_lcg_32(uint32_t state, uint32_t delta, + uint32_t cur_mult, uint32_t cur_plus); +extern uint64_t pcg_advance_lcg_64(uint64_t state, uint64_t delta, + uint64_t cur_mult, uint64_t cur_plus); + +#if PCG_HAS_128BIT_OPS +extern pcg128_t pcg_advance_lcg_128(pcg128_t state, pcg128_t delta, + pcg128_t cur_mult, pcg128_t cur_plus); +#endif + +/* Functions to advance the underlying LCG, one version for each size and + * each style. These functions are considered semi-private. There is rarely + * a good reason to call them directly. + */ + +inline void pcg_oneseq_8_step_r(struct pcg_state_8* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_8 + + PCG_DEFAULT_INCREMENT_8; +} + +inline void pcg_oneseq_8_advance_r(struct pcg_state_8* rng, uint8_t delta) +{ + rng->state = pcg_advance_lcg_8(rng->state, delta, PCG_DEFAULT_MULTIPLIER_8, + PCG_DEFAULT_INCREMENT_8); +} + +inline void pcg_mcg_8_step_r(struct pcg_state_8* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_8; +} + +inline void pcg_mcg_8_advance_r(struct pcg_state_8* rng, uint8_t delta) +{ + rng->state + = pcg_advance_lcg_8(rng->state, delta, PCG_DEFAULT_MULTIPLIER_8, 0u); +} + +inline void pcg_unique_8_step_r(struct pcg_state_8* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_8 + + (uint8_t)(((intptr_t)rng) | 1u); +} + +inline void pcg_unique_8_advance_r(struct pcg_state_8* rng, uint8_t delta) +{ + rng->state = pcg_advance_lcg_8(rng->state, delta, PCG_DEFAULT_MULTIPLIER_8, + (uint8_t)(((intptr_t)rng) | 1u)); +} + +inline void pcg_setseq_8_step_r(struct pcg_state_setseq_8* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_8 + rng->inc; +} + +inline void pcg_setseq_8_advance_r(struct pcg_state_setseq_8* rng, + uint8_t delta) +{ + rng->state = pcg_advance_lcg_8(rng->state, delta, PCG_DEFAULT_MULTIPLIER_8, + rng->inc); +} + +inline void pcg_oneseq_16_step_r(struct pcg_state_16* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_16 + + PCG_DEFAULT_INCREMENT_16; +} + +inline void pcg_oneseq_16_advance_r(struct pcg_state_16* rng, uint16_t delta) +{ + rng->state = pcg_advance_lcg_16( + rng->state, delta, PCG_DEFAULT_MULTIPLIER_16, PCG_DEFAULT_INCREMENT_16); +} + +inline void pcg_mcg_16_step_r(struct pcg_state_16* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_16; +} + +inline void pcg_mcg_16_advance_r(struct pcg_state_16* rng, uint16_t delta) +{ + rng->state + = pcg_advance_lcg_16(rng->state, delta, PCG_DEFAULT_MULTIPLIER_16, 0u); +} + +inline void pcg_unique_16_step_r(struct pcg_state_16* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_16 + + (uint16_t)(((intptr_t)rng) | 1u); +} + +inline void pcg_unique_16_advance_r(struct pcg_state_16* rng, uint16_t delta) +{ + rng->state + = pcg_advance_lcg_16(rng->state, delta, PCG_DEFAULT_MULTIPLIER_16, + (uint16_t)(((intptr_t)rng) | 1u)); +} + +inline void pcg_setseq_16_step_r(struct pcg_state_setseq_16* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_16 + rng->inc; +} + +inline void pcg_setseq_16_advance_r(struct pcg_state_setseq_16* rng, + uint16_t delta) +{ + rng->state = pcg_advance_lcg_16(rng->state, delta, + PCG_DEFAULT_MULTIPLIER_16, rng->inc); +} + +inline void pcg_oneseq_32_step_r(struct pcg_state_32* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_32 + + PCG_DEFAULT_INCREMENT_32; +} + +inline void pcg_oneseq_32_advance_r(struct pcg_state_32* rng, uint32_t delta) +{ + rng->state = pcg_advance_lcg_32( + rng->state, delta, PCG_DEFAULT_MULTIPLIER_32, PCG_DEFAULT_INCREMENT_32); +} + +inline void pcg_mcg_32_step_r(struct pcg_state_32* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_32; +} + +inline void pcg_mcg_32_advance_r(struct pcg_state_32* rng, uint32_t delta) +{ + rng->state + = pcg_advance_lcg_32(rng->state, delta, PCG_DEFAULT_MULTIPLIER_32, 0u); +} + +inline void pcg_unique_32_step_r(struct pcg_state_32* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_32 + + (uint32_t)(((intptr_t)rng) | 1u); +} + +inline void pcg_unique_32_advance_r(struct pcg_state_32* rng, uint32_t delta) +{ + rng->state + = pcg_advance_lcg_32(rng->state, delta, PCG_DEFAULT_MULTIPLIER_32, + (uint32_t)(((intptr_t)rng) | 1u)); +} + +inline void pcg_setseq_32_step_r(struct pcg_state_setseq_32* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_32 + rng->inc; +} + +inline void pcg_setseq_32_advance_r(struct pcg_state_setseq_32* rng, + uint32_t delta) +{ + rng->state = pcg_advance_lcg_32(rng->state, delta, + PCG_DEFAULT_MULTIPLIER_32, rng->inc); +} + +inline void pcg_oneseq_64_step_r(struct pcg_state_64* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_64 + + PCG_DEFAULT_INCREMENT_64; +} + +inline void pcg_oneseq_64_advance_r(struct pcg_state_64* rng, uint64_t delta) +{ + rng->state = pcg_advance_lcg_64( + rng->state, delta, PCG_DEFAULT_MULTIPLIER_64, PCG_DEFAULT_INCREMENT_64); +} + +inline void pcg_mcg_64_step_r(struct pcg_state_64* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_64; +} + +inline void pcg_mcg_64_advance_r(struct pcg_state_64* rng, uint64_t delta) +{ + rng->state + = pcg_advance_lcg_64(rng->state, delta, PCG_DEFAULT_MULTIPLIER_64, 0u); +} + +inline void pcg_unique_64_step_r(struct pcg_state_64* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_64 + + (uint64_t)(((intptr_t)rng) | 1u); +} + +inline void pcg_unique_64_advance_r(struct pcg_state_64* rng, uint64_t delta) +{ + rng->state + = pcg_advance_lcg_64(rng->state, delta, PCG_DEFAULT_MULTIPLIER_64, + (uint64_t)(((intptr_t)rng) | 1u)); +} + +inline void pcg_setseq_64_step_r(struct pcg_state_setseq_64* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_64 + rng->inc; +} + +inline void pcg_setseq_64_advance_r(struct pcg_state_setseq_64* rng, + uint64_t delta) +{ + rng->state = pcg_advance_lcg_64(rng->state, delta, + PCG_DEFAULT_MULTIPLIER_64, rng->inc); +} + +#if PCG_HAS_128BIT_OPS +inline void pcg_oneseq_128_step_r(struct pcg_state_128* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_128 + + PCG_DEFAULT_INCREMENT_128; +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_oneseq_128_advance_r(struct pcg_state_128* rng, pcg128_t delta) +{ + rng->state + = pcg_advance_lcg_128(rng->state, delta, PCG_DEFAULT_MULTIPLIER_128, + PCG_DEFAULT_INCREMENT_128); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_mcg_128_step_r(struct pcg_state_128* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_128; +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_mcg_128_advance_r(struct pcg_state_128* rng, pcg128_t delta) +{ + rng->state = pcg_advance_lcg_128(rng->state, delta, + PCG_DEFAULT_MULTIPLIER_128, 0u); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_unique_128_step_r(struct pcg_state_128* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_128 + + (pcg128_t)(((intptr_t)rng) | 1u); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_unique_128_advance_r(struct pcg_state_128* rng, pcg128_t delta) +{ + rng->state + = pcg_advance_lcg_128(rng->state, delta, PCG_DEFAULT_MULTIPLIER_128, + (pcg128_t)(((intptr_t)rng) | 1u)); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_setseq_128_step_r(struct pcg_state_setseq_128* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_128 + rng->inc; +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_setseq_128_advance_r(struct pcg_state_setseq_128* rng, + pcg128_t delta) +{ + rng->state = pcg_advance_lcg_128(rng->state, delta, + PCG_DEFAULT_MULTIPLIER_128, rng->inc); +} +#endif + +/* Functions to seed the RNG state, one version for each size and each + * style. Unlike the step functions, regular users can and should call + * these functions. + */ + +inline void pcg_oneseq_8_srandom_r(struct pcg_state_8* rng, uint8_t initstate) +{ + rng->state = 0U; + pcg_oneseq_8_step_r(rng); + rng->state += initstate; + pcg_oneseq_8_step_r(rng); +} + +inline void pcg_mcg_8_srandom_r(struct pcg_state_8* rng, uint8_t initstate) +{ + rng->state = initstate | 1u; +} + +inline void pcg_unique_8_srandom_r(struct pcg_state_8* rng, uint8_t initstate) +{ + rng->state = 0U; + pcg_unique_8_step_r(rng); + rng->state += initstate; + pcg_unique_8_step_r(rng); +} + +inline void pcg_setseq_8_srandom_r(struct pcg_state_setseq_8* rng, + uint8_t initstate, uint8_t initseq) +{ + rng->state = 0U; + rng->inc = (initseq << 1u) | 1u; + pcg_setseq_8_step_r(rng); + rng->state += initstate; + pcg_setseq_8_step_r(rng); +} + +inline void pcg_oneseq_16_srandom_r(struct pcg_state_16* rng, + uint16_t initstate) +{ + rng->state = 0U; + pcg_oneseq_16_step_r(rng); + rng->state += initstate; + pcg_oneseq_16_step_r(rng); +} + +inline void pcg_mcg_16_srandom_r(struct pcg_state_16* rng, uint16_t initstate) +{ + rng->state = initstate | 1u; +} + +inline void pcg_unique_16_srandom_r(struct pcg_state_16* rng, + uint16_t initstate) +{ + rng->state = 0U; + pcg_unique_16_step_r(rng); + rng->state += initstate; + pcg_unique_16_step_r(rng); +} + +inline void pcg_setseq_16_srandom_r(struct pcg_state_setseq_16* rng, + uint16_t initstate, uint16_t initseq) +{ + rng->state = 0U; + rng->inc = (initseq << 1u) | 1u; + pcg_setseq_16_step_r(rng); + rng->state += initstate; + pcg_setseq_16_step_r(rng); +} + +inline void pcg_oneseq_32_srandom_r(struct pcg_state_32* rng, + uint32_t initstate) +{ + rng->state = 0U; + pcg_oneseq_32_step_r(rng); + rng->state += initstate; + pcg_oneseq_32_step_r(rng); +} + +inline void pcg_mcg_32_srandom_r(struct pcg_state_32* rng, uint32_t initstate) +{ + rng->state = initstate | 1u; +} + +inline void pcg_unique_32_srandom_r(struct pcg_state_32* rng, + uint32_t initstate) +{ + rng->state = 0U; + pcg_unique_32_step_r(rng); + rng->state += initstate; + pcg_unique_32_step_r(rng); +} + +inline void pcg_setseq_32_srandom_r(struct pcg_state_setseq_32* rng, + uint32_t initstate, uint32_t initseq) +{ + rng->state = 0U; + rng->inc = (initseq << 1u) | 1u; + pcg_setseq_32_step_r(rng); + rng->state += initstate; + pcg_setseq_32_step_r(rng); +} + +inline void pcg_oneseq_64_srandom_r(struct pcg_state_64* rng, + uint64_t initstate) +{ + rng->state = 0U; + pcg_oneseq_64_step_r(rng); + rng->state += initstate; + pcg_oneseq_64_step_r(rng); +} + +inline void pcg_mcg_64_srandom_r(struct pcg_state_64* rng, uint64_t initstate) +{ + rng->state = initstate | 1u; +} + +inline void pcg_unique_64_srandom_r(struct pcg_state_64* rng, + uint64_t initstate) +{ + rng->state = 0U; + pcg_unique_64_step_r(rng); + rng->state += initstate; + pcg_unique_64_step_r(rng); +} + +inline void pcg_setseq_64_srandom_r(struct pcg_state_setseq_64* rng, + uint64_t initstate, uint64_t initseq) +{ + rng->state = 0U; + rng->inc = (initseq << 1u) | 1u; + pcg_setseq_64_step_r(rng); + rng->state += initstate; + pcg_setseq_64_step_r(rng); +} + +#if PCG_HAS_128BIT_OPS +inline void pcg_oneseq_128_srandom_r(struct pcg_state_128* rng, + pcg128_t initstate) +{ + rng->state = 0U; + pcg_oneseq_128_step_r(rng); + rng->state += initstate; + pcg_oneseq_128_step_r(rng); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_mcg_128_srandom_r(struct pcg_state_128* rng, pcg128_t initstate) +{ + rng->state = initstate | 1u; +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_unique_128_srandom_r(struct pcg_state_128* rng, + pcg128_t initstate) +{ + rng->state = 0U; + pcg_unique_128_step_r(rng); + rng->state += initstate; + pcg_unique_128_step_r(rng); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_setseq_128_srandom_r(struct pcg_state_setseq_128* rng, + pcg128_t initstate, pcg128_t initseq) +{ + rng->state = 0U; + rng->inc = (initseq << 1u) | 1u; + pcg_setseq_128_step_r(rng); + rng->state += initstate; + pcg_setseq_128_step_r(rng); +} +#endif + +/* Now, finally we create each of the individual generators. We provide + * a random_r function that provides a random number of the appropriate + * type (using the full range of the type) and a boundedrand_r version + * that provides + * + * Implementation notes for boundedrand_r: + * + * To avoid bias, we need to make the range of the RNG a multiple of + * bound, which we do by dropping output less than a threshold. + * Let's consider a 32-bit case... A naive scheme to calculate the + * threshold would be to do + * + * uint32_t threshold = 0x100000000ull % bound; + * + * but 64-bit div/mod is slower than 32-bit div/mod (especially on + * 32-bit platforms). In essence, we do + * + * uint32_t threshold = (0x100000000ull-bound) % bound; + * + * because this version will calculate the same modulus, but the LHS + * value is less than 2^32. + * + * (Note that using modulo is only wise for good RNGs, poorer RNGs + * such as raw LCGs do better using a technique based on division.) + * Empricical tests show that division is preferable to modulus for + * reducting the range of an RNG. It's faster, and sometimes it can + * even be statistically prefereable. + */ + +/* Generation functions for XSH RS */ + +inline uint8_t pcg_oneseq_16_xsh_rs_8_random_r(struct pcg_state_16* rng) +{ + uint16_t oldstate = rng->state; + pcg_oneseq_16_step_r(rng); + return pcg_output_xsh_rs_16_8(oldstate); +} + +inline uint8_t pcg_oneseq_16_xsh_rs_8_boundedrand_r(struct pcg_state_16* rng, + uint8_t bound) +{ + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_oneseq_16_xsh_rs_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t pcg_oneseq_32_xsh_rs_16_random_r(struct pcg_state_32* rng) +{ + uint32_t oldstate = rng->state; + pcg_oneseq_32_step_r(rng); + return pcg_output_xsh_rs_32_16(oldstate); +} + +inline uint16_t pcg_oneseq_32_xsh_rs_16_boundedrand_r(struct pcg_state_32* rng, + uint16_t bound) +{ + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_oneseq_32_xsh_rs_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t pcg_oneseq_64_xsh_rs_32_random_r(struct pcg_state_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_oneseq_64_step_r(rng); + return pcg_output_xsh_rs_64_32(oldstate); +} + +inline uint32_t pcg_oneseq_64_xsh_rs_32_boundedrand_r(struct pcg_state_64* rng, + uint32_t bound) +{ + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_oneseq_64_xsh_rs_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_oneseq_128_xsh_rs_64_random_r(struct pcg_state_128* rng) +{ + pcg_oneseq_128_step_r(rng); + return pcg_output_xsh_rs_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_oneseq_128_xsh_rs_64_boundedrand_r(struct pcg_state_128* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_oneseq_128_xsh_rs_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint8_t pcg_unique_16_xsh_rs_8_random_r(struct pcg_state_16* rng) +{ + uint16_t oldstate = rng->state; + pcg_unique_16_step_r(rng); + return pcg_output_xsh_rs_16_8(oldstate); +} + +inline uint8_t pcg_unique_16_xsh_rs_8_boundedrand_r(struct pcg_state_16* rng, + uint8_t bound) +{ + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_unique_16_xsh_rs_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t pcg_unique_32_xsh_rs_16_random_r(struct pcg_state_32* rng) +{ + uint32_t oldstate = rng->state; + pcg_unique_32_step_r(rng); + return pcg_output_xsh_rs_32_16(oldstate); +} + +inline uint16_t pcg_unique_32_xsh_rs_16_boundedrand_r(struct pcg_state_32* rng, + uint16_t bound) +{ + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_unique_32_xsh_rs_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t pcg_unique_64_xsh_rs_32_random_r(struct pcg_state_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_unique_64_step_r(rng); + return pcg_output_xsh_rs_64_32(oldstate); +} + +inline uint32_t pcg_unique_64_xsh_rs_32_boundedrand_r(struct pcg_state_64* rng, + uint32_t bound) +{ + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_unique_64_xsh_rs_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_unique_128_xsh_rs_64_random_r(struct pcg_state_128* rng) +{ + pcg_unique_128_step_r(rng); + return pcg_output_xsh_rs_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_unique_128_xsh_rs_64_boundedrand_r(struct pcg_state_128* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_unique_128_xsh_rs_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint8_t pcg_setseq_16_xsh_rs_8_random_r(struct pcg_state_setseq_16* rng) +{ + uint16_t oldstate = rng->state; + pcg_setseq_16_step_r(rng); + return pcg_output_xsh_rs_16_8(oldstate); +} + +inline uint8_t +pcg_setseq_16_xsh_rs_8_boundedrand_r(struct pcg_state_setseq_16* rng, + uint8_t bound) +{ + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_setseq_16_xsh_rs_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t +pcg_setseq_32_xsh_rs_16_random_r(struct pcg_state_setseq_32* rng) +{ + uint32_t oldstate = rng->state; + pcg_setseq_32_step_r(rng); + return pcg_output_xsh_rs_32_16(oldstate); +} + +inline uint16_t +pcg_setseq_32_xsh_rs_16_boundedrand_r(struct pcg_state_setseq_32* rng, + uint16_t bound) +{ + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_setseq_32_xsh_rs_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t +pcg_setseq_64_xsh_rs_32_random_r(struct pcg_state_setseq_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_setseq_64_step_r(rng); + return pcg_output_xsh_rs_64_32(oldstate); +} + +inline uint32_t +pcg_setseq_64_xsh_rs_32_boundedrand_r(struct pcg_state_setseq_64* rng, + uint32_t bound) +{ + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_setseq_64_xsh_rs_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_setseq_128_xsh_rs_64_random_r(struct pcg_state_setseq_128* rng) +{ + pcg_setseq_128_step_r(rng); + return pcg_output_xsh_rs_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_setseq_128_xsh_rs_64_boundedrand_r(struct pcg_state_setseq_128* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_setseq_128_xsh_rs_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint8_t pcg_mcg_16_xsh_rs_8_random_r(struct pcg_state_16* rng) +{ + uint16_t oldstate = rng->state; + pcg_mcg_16_step_r(rng); + return pcg_output_xsh_rs_16_8(oldstate); +} + +inline uint8_t pcg_mcg_16_xsh_rs_8_boundedrand_r(struct pcg_state_16* rng, + uint8_t bound) +{ + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_mcg_16_xsh_rs_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t pcg_mcg_32_xsh_rs_16_random_r(struct pcg_state_32* rng) +{ + uint32_t oldstate = rng->state; + pcg_mcg_32_step_r(rng); + return pcg_output_xsh_rs_32_16(oldstate); +} + +inline uint16_t pcg_mcg_32_xsh_rs_16_boundedrand_r(struct pcg_state_32* rng, + uint16_t bound) +{ + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_mcg_32_xsh_rs_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t pcg_mcg_64_xsh_rs_32_random_r(struct pcg_state_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_mcg_64_step_r(rng); + return pcg_output_xsh_rs_64_32(oldstate); +} + +inline uint32_t pcg_mcg_64_xsh_rs_32_boundedrand_r(struct pcg_state_64* rng, + uint32_t bound) +{ + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_mcg_64_xsh_rs_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_mcg_128_xsh_rs_64_random_r(struct pcg_state_128* rng) +{ + pcg_mcg_128_step_r(rng); + return pcg_output_xsh_rs_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_mcg_128_xsh_rs_64_boundedrand_r(struct pcg_state_128* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_mcg_128_xsh_rs_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +/* Generation functions for XSH RR */ + +inline uint8_t pcg_oneseq_16_xsh_rr_8_random_r(struct pcg_state_16* rng) +{ + uint16_t oldstate = rng->state; + pcg_oneseq_16_step_r(rng); + return pcg_output_xsh_rr_16_8(oldstate); +} + +inline uint8_t pcg_oneseq_16_xsh_rr_8_boundedrand_r(struct pcg_state_16* rng, + uint8_t bound) +{ + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_oneseq_16_xsh_rr_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t pcg_oneseq_32_xsh_rr_16_random_r(struct pcg_state_32* rng) +{ + uint32_t oldstate = rng->state; + pcg_oneseq_32_step_r(rng); + return pcg_output_xsh_rr_32_16(oldstate); +} + +inline uint16_t pcg_oneseq_32_xsh_rr_16_boundedrand_r(struct pcg_state_32* rng, + uint16_t bound) +{ + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_oneseq_32_xsh_rr_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t pcg_oneseq_64_xsh_rr_32_random_r(struct pcg_state_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_oneseq_64_step_r(rng); + return pcg_output_xsh_rr_64_32(oldstate); +} + +inline uint32_t pcg_oneseq_64_xsh_rr_32_boundedrand_r(struct pcg_state_64* rng, + uint32_t bound) +{ + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_oneseq_64_xsh_rr_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_oneseq_128_xsh_rr_64_random_r(struct pcg_state_128* rng) +{ + pcg_oneseq_128_step_r(rng); + return pcg_output_xsh_rr_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_oneseq_128_xsh_rr_64_boundedrand_r(struct pcg_state_128* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_oneseq_128_xsh_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint8_t pcg_unique_16_xsh_rr_8_random_r(struct pcg_state_16* rng) +{ + uint16_t oldstate = rng->state; + pcg_unique_16_step_r(rng); + return pcg_output_xsh_rr_16_8(oldstate); +} + +inline uint8_t pcg_unique_16_xsh_rr_8_boundedrand_r(struct pcg_state_16* rng, + uint8_t bound) +{ + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_unique_16_xsh_rr_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t pcg_unique_32_xsh_rr_16_random_r(struct pcg_state_32* rng) +{ + uint32_t oldstate = rng->state; + pcg_unique_32_step_r(rng); + return pcg_output_xsh_rr_32_16(oldstate); +} + +inline uint16_t pcg_unique_32_xsh_rr_16_boundedrand_r(struct pcg_state_32* rng, + uint16_t bound) +{ + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_unique_32_xsh_rr_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t pcg_unique_64_xsh_rr_32_random_r(struct pcg_state_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_unique_64_step_r(rng); + return pcg_output_xsh_rr_64_32(oldstate); +} + +inline uint32_t pcg_unique_64_xsh_rr_32_boundedrand_r(struct pcg_state_64* rng, + uint32_t bound) +{ + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_unique_64_xsh_rr_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_unique_128_xsh_rr_64_random_r(struct pcg_state_128* rng) +{ + pcg_unique_128_step_r(rng); + return pcg_output_xsh_rr_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_unique_128_xsh_rr_64_boundedrand_r(struct pcg_state_128* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_unique_128_xsh_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint8_t pcg_setseq_16_xsh_rr_8_random_r(struct pcg_state_setseq_16* rng) +{ + uint16_t oldstate = rng->state; + pcg_setseq_16_step_r(rng); + return pcg_output_xsh_rr_16_8(oldstate); +} + +inline uint8_t +pcg_setseq_16_xsh_rr_8_boundedrand_r(struct pcg_state_setseq_16* rng, + uint8_t bound) +{ + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_setseq_16_xsh_rr_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t +pcg_setseq_32_xsh_rr_16_random_r(struct pcg_state_setseq_32* rng) +{ + uint32_t oldstate = rng->state; + pcg_setseq_32_step_r(rng); + return pcg_output_xsh_rr_32_16(oldstate); +} + +inline uint16_t +pcg_setseq_32_xsh_rr_16_boundedrand_r(struct pcg_state_setseq_32* rng, + uint16_t bound) +{ + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_setseq_32_xsh_rr_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t +pcg_setseq_64_xsh_rr_32_random_r(struct pcg_state_setseq_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_setseq_64_step_r(rng); + return pcg_output_xsh_rr_64_32(oldstate); +} + +inline uint32_t +pcg_setseq_64_xsh_rr_32_boundedrand_r(struct pcg_state_setseq_64* rng, + uint32_t bound) +{ + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_setseq_64_xsh_rr_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_setseq_128_xsh_rr_64_random_r(struct pcg_state_setseq_128* rng) +{ + pcg_setseq_128_step_r(rng); + return pcg_output_xsh_rr_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_setseq_128_xsh_rr_64_boundedrand_r(struct pcg_state_setseq_128* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_setseq_128_xsh_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint8_t pcg_mcg_16_xsh_rr_8_random_r(struct pcg_state_16* rng) +{ + uint16_t oldstate = rng->state; + pcg_mcg_16_step_r(rng); + return pcg_output_xsh_rr_16_8(oldstate); +} + +inline uint8_t pcg_mcg_16_xsh_rr_8_boundedrand_r(struct pcg_state_16* rng, + uint8_t bound) +{ + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_mcg_16_xsh_rr_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t pcg_mcg_32_xsh_rr_16_random_r(struct pcg_state_32* rng) +{ + uint32_t oldstate = rng->state; + pcg_mcg_32_step_r(rng); + return pcg_output_xsh_rr_32_16(oldstate); +} + +inline uint16_t pcg_mcg_32_xsh_rr_16_boundedrand_r(struct pcg_state_32* rng, + uint16_t bound) +{ + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_mcg_32_xsh_rr_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t pcg_mcg_64_xsh_rr_32_random_r(struct pcg_state_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_mcg_64_step_r(rng); + return pcg_output_xsh_rr_64_32(oldstate); +} + +inline uint32_t pcg_mcg_64_xsh_rr_32_boundedrand_r(struct pcg_state_64* rng, + uint32_t bound) +{ + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_mcg_64_xsh_rr_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_mcg_128_xsh_rr_64_random_r(struct pcg_state_128* rng) +{ + pcg_mcg_128_step_r(rng); + return pcg_output_xsh_rr_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_mcg_128_xsh_rr_64_boundedrand_r(struct pcg_state_128* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_mcg_128_xsh_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +/* Generation functions for RXS M XS (no MCG versions because they + * don't make sense when you want to use the entire state) + */ + +inline uint8_t pcg_oneseq_8_rxs_m_xs_8_random_r(struct pcg_state_8* rng) +{ + uint8_t oldstate = rng->state; + pcg_oneseq_8_step_r(rng); + return pcg_output_rxs_m_xs_8_8(oldstate); +} + +inline uint8_t pcg_oneseq_8_rxs_m_xs_8_boundedrand_r(struct pcg_state_8* rng, + uint8_t bound) +{ + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_oneseq_8_rxs_m_xs_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t pcg_oneseq_16_rxs_m_xs_16_random_r(struct pcg_state_16* rng) +{ + uint16_t oldstate = rng->state; + pcg_oneseq_16_step_r(rng); + return pcg_output_rxs_m_xs_16_16(oldstate); +} + +inline uint16_t +pcg_oneseq_16_rxs_m_xs_16_boundedrand_r(struct pcg_state_16* rng, + uint16_t bound) +{ + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_oneseq_16_rxs_m_xs_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t pcg_oneseq_32_rxs_m_xs_32_random_r(struct pcg_state_32* rng) +{ + uint32_t oldstate = rng->state; + pcg_oneseq_32_step_r(rng); + return pcg_output_rxs_m_xs_32_32(oldstate); +} + +inline uint32_t +pcg_oneseq_32_rxs_m_xs_32_boundedrand_r(struct pcg_state_32* rng, + uint32_t bound) +{ + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_oneseq_32_rxs_m_xs_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint64_t pcg_oneseq_64_rxs_m_xs_64_random_r(struct pcg_state_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_oneseq_64_step_r(rng); + return pcg_output_rxs_m_xs_64_64(oldstate); +} + +inline uint64_t +pcg_oneseq_64_rxs_m_xs_64_boundedrand_r(struct pcg_state_64* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_oneseq_64_rxs_m_xs_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t pcg_oneseq_128_rxs_m_xs_128_random_r(struct pcg_state_128* rng) +{ + pcg_oneseq_128_step_r(rng); + return pcg_output_rxs_m_xs_128_128(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_oneseq_128_rxs_m_xs_128_boundedrand_r(struct pcg_state_128* rng, + pcg128_t bound) +{ + pcg128_t threshold = -bound % bound; + for (;;) { + pcg128_t r = pcg_oneseq_128_rxs_m_xs_128_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint16_t pcg_unique_16_rxs_m_xs_16_random_r(struct pcg_state_16* rng) +{ + uint16_t oldstate = rng->state; + pcg_unique_16_step_r(rng); + return pcg_output_rxs_m_xs_16_16(oldstate); +} + +inline uint16_t +pcg_unique_16_rxs_m_xs_16_boundedrand_r(struct pcg_state_16* rng, + uint16_t bound) +{ + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_unique_16_rxs_m_xs_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t pcg_unique_32_rxs_m_xs_32_random_r(struct pcg_state_32* rng) +{ + uint32_t oldstate = rng->state; + pcg_unique_32_step_r(rng); + return pcg_output_rxs_m_xs_32_32(oldstate); +} + +inline uint32_t +pcg_unique_32_rxs_m_xs_32_boundedrand_r(struct pcg_state_32* rng, + uint32_t bound) +{ + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_unique_32_rxs_m_xs_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint64_t pcg_unique_64_rxs_m_xs_64_random_r(struct pcg_state_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_unique_64_step_r(rng); + return pcg_output_rxs_m_xs_64_64(oldstate); +} + +inline uint64_t +pcg_unique_64_rxs_m_xs_64_boundedrand_r(struct pcg_state_64* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_unique_64_rxs_m_xs_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t pcg_unique_128_rxs_m_xs_128_random_r(struct pcg_state_128* rng) +{ + pcg_unique_128_step_r(rng); + return pcg_output_rxs_m_xs_128_128(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_unique_128_rxs_m_xs_128_boundedrand_r(struct pcg_state_128* rng, + pcg128_t bound) +{ + pcg128_t threshold = -bound % bound; + for (;;) { + pcg128_t r = pcg_unique_128_rxs_m_xs_128_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint8_t pcg_setseq_8_rxs_m_xs_8_random_r(struct pcg_state_setseq_8* rng) +{ + uint8_t oldstate = rng->state; + pcg_setseq_8_step_r(rng); + return pcg_output_rxs_m_xs_8_8(oldstate); +} + +inline uint8_t +pcg_setseq_8_rxs_m_xs_8_boundedrand_r(struct pcg_state_setseq_8* rng, + uint8_t bound) +{ + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_setseq_8_rxs_m_xs_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t +pcg_setseq_16_rxs_m_xs_16_random_r(struct pcg_state_setseq_16* rng) +{ + uint16_t oldstate = rng->state; + pcg_setseq_16_step_r(rng); + return pcg_output_rxs_m_xs_16_16(oldstate); +} + +inline uint16_t +pcg_setseq_16_rxs_m_xs_16_boundedrand_r(struct pcg_state_setseq_16* rng, + uint16_t bound) +{ + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_setseq_16_rxs_m_xs_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t +pcg_setseq_32_rxs_m_xs_32_random_r(struct pcg_state_setseq_32* rng) +{ + uint32_t oldstate = rng->state; + pcg_setseq_32_step_r(rng); + return pcg_output_rxs_m_xs_32_32(oldstate); +} + +inline uint32_t +pcg_setseq_32_rxs_m_xs_32_boundedrand_r(struct pcg_state_setseq_32* rng, + uint32_t bound) +{ + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_setseq_32_rxs_m_xs_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint64_t +pcg_setseq_64_rxs_m_xs_64_random_r(struct pcg_state_setseq_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_setseq_64_step_r(rng); + return pcg_output_rxs_m_xs_64_64(oldstate); +} + +inline uint64_t +pcg_setseq_64_rxs_m_xs_64_boundedrand_r(struct pcg_state_setseq_64* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_setseq_64_rxs_m_xs_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_setseq_128_rxs_m_xs_128_random_r(struct pcg_state_setseq_128* rng) +{ + pcg_setseq_128_step_r(rng); + return pcg_output_rxs_m_xs_128_128(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_setseq_128_rxs_m_xs_128_boundedrand_r(struct pcg_state_setseq_128* rng, + pcg128_t bound) +{ + pcg128_t threshold = -bound % bound; + for (;;) { + pcg128_t r = pcg_setseq_128_rxs_m_xs_128_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +/* Generation functions for XSL RR (only defined for "large" types) */ + +inline uint32_t pcg_oneseq_64_xsl_rr_32_random_r(struct pcg_state_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_oneseq_64_step_r(rng); + return pcg_output_xsl_rr_64_32(oldstate); +} + +inline uint32_t pcg_oneseq_64_xsl_rr_32_boundedrand_r(struct pcg_state_64* rng, + uint32_t bound) +{ + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_oneseq_64_xsl_rr_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_oneseq_128_xsl_rr_64_random_r(struct pcg_state_128* rng) +{ + pcg_oneseq_128_step_r(rng); + return pcg_output_xsl_rr_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_oneseq_128_xsl_rr_64_boundedrand_r(struct pcg_state_128* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_oneseq_128_xsl_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint32_t pcg_unique_64_xsl_rr_32_random_r(struct pcg_state_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_unique_64_step_r(rng); + return pcg_output_xsl_rr_64_32(oldstate); +} + +inline uint32_t pcg_unique_64_xsl_rr_32_boundedrand_r(struct pcg_state_64* rng, + uint32_t bound) +{ + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_unique_64_xsl_rr_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_unique_128_xsl_rr_64_random_r(struct pcg_state_128* rng) +{ + pcg_unique_128_step_r(rng); + return pcg_output_xsl_rr_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_unique_128_xsl_rr_64_boundedrand_r(struct pcg_state_128* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_unique_128_xsl_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint32_t +pcg_setseq_64_xsl_rr_32_random_r(struct pcg_state_setseq_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_setseq_64_step_r(rng); + return pcg_output_xsl_rr_64_32(oldstate); +} + +inline uint32_t +pcg_setseq_64_xsl_rr_32_boundedrand_r(struct pcg_state_setseq_64* rng, + uint32_t bound) +{ + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_setseq_64_xsl_rr_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_setseq_128_xsl_rr_64_random_r(struct pcg_state_setseq_128* rng) +{ + pcg_setseq_128_step_r(rng); + return pcg_output_xsl_rr_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_setseq_128_xsl_rr_64_boundedrand_r(struct pcg_state_setseq_128* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_setseq_128_xsl_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint32_t pcg_mcg_64_xsl_rr_32_random_r(struct pcg_state_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_mcg_64_step_r(rng); + return pcg_output_xsl_rr_64_32(oldstate); +} + +inline uint32_t pcg_mcg_64_xsl_rr_32_boundedrand_r(struct pcg_state_64* rng, + uint32_t bound) +{ + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_mcg_64_xsl_rr_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_mcg_128_xsl_rr_64_random_r(struct pcg_state_128* rng) +{ + pcg_mcg_128_step_r(rng); + return pcg_output_xsl_rr_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_mcg_128_xsl_rr_64_boundedrand_r(struct pcg_state_128* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_mcg_128_xsl_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +/* Generation functions for XSL RR RR (only defined for "large" types) */ + +inline uint64_t pcg_oneseq_64_xsl_rr_rr_64_random_r(struct pcg_state_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_oneseq_64_step_r(rng); + return pcg_output_xsl_rr_rr_64_64(oldstate); +} + +inline uint64_t +pcg_oneseq_64_xsl_rr_rr_64_boundedrand_r(struct pcg_state_64* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_oneseq_64_xsl_rr_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t pcg_oneseq_128_xsl_rr_rr_128_random_r(struct pcg_state_128* rng) +{ + pcg_oneseq_128_step_r(rng); + return pcg_output_xsl_rr_rr_128_128(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_oneseq_128_xsl_rr_rr_128_boundedrand_r(struct pcg_state_128* rng, + pcg128_t bound) +{ + pcg128_t threshold = -bound % bound; + for (;;) { + pcg128_t r = pcg_oneseq_128_xsl_rr_rr_128_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint64_t pcg_unique_64_xsl_rr_rr_64_random_r(struct pcg_state_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_unique_64_step_r(rng); + return pcg_output_xsl_rr_rr_64_64(oldstate); +} + +inline uint64_t +pcg_unique_64_xsl_rr_rr_64_boundedrand_r(struct pcg_state_64* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_unique_64_xsl_rr_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t pcg_unique_128_xsl_rr_rr_128_random_r(struct pcg_state_128* rng) +{ + pcg_unique_128_step_r(rng); + return pcg_output_xsl_rr_rr_128_128(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_unique_128_xsl_rr_rr_128_boundedrand_r(struct pcg_state_128* rng, + pcg128_t bound) +{ + pcg128_t threshold = -bound % bound; + for (;;) { + pcg128_t r = pcg_unique_128_xsl_rr_rr_128_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint64_t +pcg_setseq_64_xsl_rr_rr_64_random_r(struct pcg_state_setseq_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_setseq_64_step_r(rng); + return pcg_output_xsl_rr_rr_64_64(oldstate); +} + +inline uint64_t +pcg_setseq_64_xsl_rr_rr_64_boundedrand_r(struct pcg_state_setseq_64* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_setseq_64_xsl_rr_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_setseq_128_xsl_rr_rr_128_random_r(struct pcg_state_setseq_128* rng) +{ + pcg_setseq_128_step_r(rng); + return pcg_output_xsl_rr_rr_128_128(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_setseq_128_xsl_rr_rr_128_boundedrand_r(struct pcg_state_setseq_128* rng, + pcg128_t bound) +{ + pcg128_t threshold = -bound % bound; + for (;;) { + pcg128_t r = pcg_setseq_128_xsl_rr_rr_128_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +//// Typedefs +typedef struct pcg_state_setseq_64 pcg32_random_t; +typedef struct pcg_state_64 pcg32s_random_t; +typedef struct pcg_state_64 pcg32u_random_t; +typedef struct pcg_state_64 pcg32f_random_t; +//// random_r +#define pcg32_random_r pcg_setseq_64_xsh_rr_32_random_r +#define pcg32s_random_r pcg_oneseq_64_xsh_rr_32_random_r +#define pcg32u_random_r pcg_unique_64_xsh_rr_32_random_r +#define pcg32f_random_r pcg_mcg_64_xsh_rs_32_random_r +//// boundedrand_r +#define pcg32_boundedrand_r pcg_setseq_64_xsh_rr_32_boundedrand_r +#define pcg32s_boundedrand_r pcg_oneseq_64_xsh_rr_32_boundedrand_r +#define pcg32u_boundedrand_r pcg_unique_64_xsh_rr_32_boundedrand_r +#define pcg32f_boundedrand_r pcg_mcg_64_xsh_rs_32_boundedrand_r +//// srandom_r +#define pcg32_srandom_r pcg_setseq_64_srandom_r +#define pcg32s_srandom_r pcg_oneseq_64_srandom_r +#define pcg32u_srandom_r pcg_unique_64_srandom_r +#define pcg32f_srandom_r pcg_mcg_64_srandom_r +//// advance_r +#define pcg32_advance_r pcg_setseq_64_advance_r +#define pcg32s_advance_r pcg_oneseq_64_advance_r +#define pcg32u_advance_r pcg_unique_64_advance_r +#define pcg32f_advance_r pcg_mcg_64_advance_r + +#if PCG_HAS_128BIT_OPS +//// Typedefs +typedef struct pcg_state_setseq_128 pcg64_random_t; +typedef struct pcg_state_128 pcg64s_random_t; +typedef struct pcg_state_128 pcg64u_random_t; +typedef struct pcg_state_128 pcg64f_random_t; +//// random_r +#define pcg64_random_r pcg_setseq_128_xsl_rr_64_random_r +#define pcg64s_random_r pcg_oneseq_128_xsl_rr_64_random_r +#define pcg64u_random_r pcg_unique_128_xsl_rr_64_random_r +#define pcg64f_random_r pcg_mcg_128_xsl_rr_64_random_r +//// boundedrand_r +#define pcg64_boundedrand_r pcg_setseq_128_xsl_rr_64_boundedrand_r +#define pcg64s_boundedrand_r pcg_oneseq_128_xsl_rr_64_boundedrand_r +#define pcg64u_boundedrand_r pcg_unique_128_xsl_rr_64_boundedrand_r +#define pcg64f_boundedrand_r pcg_mcg_128_xsl_rr_64_boundedrand_r +//// srandom_r +#define pcg64_srandom_r pcg_setseq_128_srandom_r +#define pcg64s_srandom_r pcg_oneseq_128_srandom_r +#define pcg64u_srandom_r pcg_unique_128_srandom_r +#define pcg64f_srandom_r pcg_mcg_128_srandom_r +//// advance_r +#define pcg64_advance_r pcg_setseq_128_advance_r +#define pcg64s_advance_r pcg_oneseq_128_advance_r +#define pcg64u_advance_r pcg_unique_128_advance_r +#define pcg64f_advance_r pcg_mcg_128_advance_r +#endif + +//// Typedefs +typedef struct pcg_state_8 pcg8si_random_t; +typedef struct pcg_state_16 pcg16si_random_t; +typedef struct pcg_state_32 pcg32si_random_t; +typedef struct pcg_state_64 pcg64si_random_t; +//// random_r +#define pcg8si_random_r pcg_oneseq_8_rxs_m_xs_8_random_r +#define pcg16si_random_r pcg_oneseq_16_rxs_m_xs_16_random_r +#define pcg32si_random_r pcg_oneseq_32_rxs_m_xs_32_random_r +#define pcg64si_random_r pcg_oneseq_64_rxs_m_xs_64_random_r +//// boundedrand_r +#define pcg8si_boundedrand_r pcg_oneseq_8_rxs_m_xs_8_boundedrand_r +#define pcg16si_boundedrand_r pcg_oneseq_16_rxs_m_xs_16_boundedrand_r +#define pcg32si_boundedrand_r pcg_oneseq_32_rxs_m_xs_32_boundedrand_r +#define pcg64si_boundedrand_r pcg_oneseq_64_rxs_m_xs_64_boundedrand_r +//// srandom_r +#define pcg8si_srandom_r pcg_oneseq_8_srandom_r +#define pcg16si_srandom_r pcg_oneseq_16_srandom_r +#define pcg32si_srandom_r pcg_oneseq_32_srandom_r +#define pcg64si_srandom_r pcg_oneseq_64_srandom_r +//// advance_r +#define pcg8si_advance_r pcg_oneseq_8_advance_r +#define pcg16si_advance_r pcg_oneseq_16_advance_r +#define pcg32si_advance_r pcg_oneseq_32_advance_r +#define pcg64si_advance_r pcg_oneseq_64_advance_r + +#if PCG_HAS_128BIT_OPS +typedef struct pcg_state_128 pcg128si_random_t; +#define pcg128si_random_r pcg_oneseq_128_rxs_m_xs_128_random_r +#define pcg128si_boundedrand_r pcg_oneseq_128_rxs_m_xs_128_boundedrand_r +#define pcg128si_srandom_r pcg_oneseq_128_srandom_r +#define pcg128si_advance_r pcg_oneseq_128_advance_r +#endif + +//// Typedefs +typedef struct pcg_state_setseq_8 pcg8i_random_t; +typedef struct pcg_state_setseq_16 pcg16i_random_t; +typedef struct pcg_state_setseq_32 pcg32i_random_t; +typedef struct pcg_state_setseq_64 pcg64i_random_t; +//// random_r +#define pcg8i_random_r pcg_setseq_8_rxs_m_xs_8_random_r +#define pcg16i_random_r pcg_setseq_16_rxs_m_xs_16_random_r +#define pcg32i_random_r pcg_setseq_32_rxs_m_xs_32_random_r +#define pcg64i_random_r pcg_setseq_64_rxs_m_xs_64_random_r +//// boundedrand_r +#define pcg8i_boundedrand_r pcg_setseq_8_rxs_m_xs_8_boundedrand_r +#define pcg16i_boundedrand_r pcg_setseq_16_rxs_m_xs_16_boundedrand_r +#define pcg32i_boundedrand_r pcg_setseq_32_rxs_m_xs_32_boundedrand_r +#define pcg64i_boundedrand_r pcg_setseq_64_rxs_m_xs_64_boundedrand_r +//// srandom_r +#define pcg8i_srandom_r pcg_setseq_8_srandom_r +#define pcg16i_srandom_r pcg_setseq_16_srandom_r +#define pcg32i_srandom_r pcg_setseq_32_srandom_r +#define pcg64i_srandom_r pcg_setseq_64_srandom_r +//// advance_r +#define pcg8i_advance_r pcg_setseq_8_advance_r +#define pcg16i_advance_r pcg_setseq_16_advance_r +#define pcg32i_advance_r pcg_setseq_32_advance_r +#define pcg64i_advance_r pcg_setseq_64_advance_r + +#if PCG_HAS_128BIT_OPS +typedef struct pcg_state_setseq_128 pcg128i_random_t; +#define pcg128i_random_r pcg_setseq_128_rxs_m_xs_128_random_r +#define pcg128i_boundedrand_r pcg_setseq_128_rxs_m_xs_128_boundedrand_r +#define pcg128i_srandom_r pcg_setseq_128_srandom_r +#define pcg128i_advance_r pcg_setseq_128_advance_r +#endif + +extern uint32_t pcg32_random(); +extern uint32_t pcg32_boundedrand(uint32_t bound); +extern void pcg32_srandom(uint64_t seed, uint64_t seq); +extern void pcg32_advance(uint64_t delta); + +#if PCG_HAS_128BIT_OPS +extern uint64_t pcg64_random(); +extern uint64_t pcg64_boundedrand(uint64_t bound); +extern void pcg64_srandom(pcg128_t seed, pcg128_t seq); +extern void pcg64_advance(pcg128_t delta); +#endif + +/* + * Static initialization constants (if you can't call srandom for some + * bizarre reason). + */ + +#define PCG32_INITIALIZER PCG_STATE_SETSEQ_64_INITIALIZER +#define PCG32U_INITIALIZER PCG_STATE_UNIQUE_64_INITIALIZER +#define PCG32S_INITIALIZER PCG_STATE_ONESEQ_64_INITIALIZER +#define PCG32F_INITIALIZER PCG_STATE_MCG_64_INITIALIZER + +#if PCG_HAS_128BIT_OPS +#define PCG64_INITIALIZER PCG_STATE_SETSEQ_128_INITIALIZER +#define PCG64U_INITIALIZER PCG_STATE_UNIQUE_128_INITIALIZER +#define PCG64S_INITIALIZER PCG_STATE_ONESEQ_128_INITIALIZER +#define PCG64F_INITIALIZER PCG_STATE_MCG_128_INITIALIZER +#endif + +#define PCG8SI_INITIALIZER PCG_STATE_ONESEQ_8_INITIALIZER +#define PCG16SI_INITIALIZER PCG_STATE_ONESEQ_16_INITIALIZER +#define PCG32SI_INITIALIZER PCG_STATE_ONESEQ_32_INITIALIZER +#define PCG64SI_INITIALIZER PCG_STATE_ONESEQ_64_INITIALIZER +#if PCG_HAS_128BIT_OPS +#define PCG128SI_INITIALIZER PCG_STATE_ONESEQ_128_INITIALIZER +#endif + +#define PCG8I_INITIALIZER PCG_STATE_SETSEQ_8_INITIALIZER +#define PCG16I_INITIALIZER PCG_STATE_SETSEQ_16_INITIALIZER +#define PCG32I_INITIALIZER PCG_STATE_SETSEQ_32_INITIALIZER +#define PCG64I_INITIALIZER PCG_STATE_SETSEQ_64_INITIALIZER +#if PCG_HAS_128BIT_OPS +#define PCG128I_INITIALIZER PCG_STATE_SETSEQ_128_INITIALIZER +#endif + +#if __cplusplus +} +#endif + +#endif // PCG_VARIANTS_H_INCLUDED diff --git a/numpy/random/src/pcg64/LICENSE.md b/numpy/random/src/pcg64/LICENSE.md new file mode 100644 index 000000000..dd6a17ee8 --- /dev/null +++ b/numpy/random/src/pcg64/LICENSE.md @@ -0,0 +1,22 @@ +# PCG64 + +PCG Random Number Generation for C. + +Copyright 2014 Melissa O'Neill <oneill@pcg-random.org> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +For additional information about the PCG random number generation scheme, +including its license and other licensing options, visit + + http://www.pcg-random.org diff --git a/numpy/random/src/pcg64/pcg64-benchmark.c b/numpy/random/src/pcg64/pcg64-benchmark.c new file mode 100644 index 000000000..76f3ec78c --- /dev/null +++ b/numpy/random/src/pcg64/pcg64-benchmark.c @@ -0,0 +1,42 @@ +/* + * cl pcg64-benchmark.c pcg64.c ../splitmix64/splitmix64.c /Ox + * Measure-Command { .\xoroshiro128-benchmark.exe } + * + * gcc pcg64-benchmark.c pcg64.c ../splitmix64/splitmix64.c -O3 -o + * pcg64-benchmark + * time ./pcg64-benchmark + */ +#include "../splitmix64/splitmix64.h" +#include "pcg64.h" +#include <inttypes.h> +#include <stdio.h> +#include <time.h> + +#define N 1000000000 + +int main() { + pcg64_random_t rng; + uint64_t sum = 0, count = 0; + uint64_t seed = 0xDEADBEAF; + int i; +#if __SIZEOF_INT128__ && !defined(PCG_FORCE_EMULATED_128BIT_MATH) + rng.state = (__uint128_t)splitmix64_next(&seed) << 64; + rng.state |= splitmix64_next(&seed); + rng.inc = (__uint128_t)1; +#else + rng.state.high = splitmix64_next(&seed); + rng.state.low = splitmix64_next(&seed); + rng.inc.high = 0; + rng.inc.low = 1; +#endif + clock_t begin = clock(); + for (i = 0; i < N; i++) { + sum += pcg64_random_r(&rng); + count++; + } + clock_t end = clock(); + double time_spent = (double)(end - begin) / CLOCKS_PER_SEC; + printf("0x%" PRIx64 "\ncount: %" PRIu64 "\n", sum, count); + printf("%" PRIu64 " randoms per second\n", + (uint64_t)(N / time_spent) / 1000000 * 1000000); +} diff --git a/numpy/random/src/pcg64/pcg64-test-data-gen.c b/numpy/random/src/pcg64/pcg64-test-data-gen.c new file mode 100644 index 000000000..0c2b079a3 --- /dev/null +++ b/numpy/random/src/pcg64/pcg64-test-data-gen.c @@ -0,0 +1,73 @@ +/* + * Generate testing csv files + * + * GCC only + * + * gcc pcg64-test-data-gen.c pcg64.orig.c ../splitmix64/splitmix64.c -o + * pgc64-test-data-gen + */ + +#include "pcg64.orig.h" +#include <inttypes.h> +#include <stdio.h> + +#define N 1000 + +int main() { + pcg64_random_t rng; + uint64_t state, seed = 0xDEADBEAF; + state = seed; + __uint128_t temp, s, inc; + int i; + uint64_t store[N]; + s = (__uint128_t)seed; + inc = (__uint128_t)0; + pcg64_srandom_r(&rng, s, inc); + printf("0x%" PRIx64, (uint64_t)(rng.state >> 64)); + printf("%" PRIx64 "\n", (uint64_t)rng.state); + printf("0x%" PRIx64, (uint64_t)(rng.inc >> 64)); + printf("%" PRIx64 "\n", (uint64_t)rng.inc); + for (i = 0; i < N; i++) { + store[i] = pcg64_random_r(&rng); + } + + FILE *fp; + fp = fopen("pcg64-testset-1.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); + + state = seed = 0; + s = (__uint128_t)seed; + i = (__uint128_t)0; + pcg64_srandom_r(&rng, s, i); + printf("0x%" PRIx64, (uint64_t)(rng.state >> 64)); + printf("%" PRIx64 "\n", (uint64_t)rng.state); + printf("0x%" PRIx64, (uint64_t)(rng.inc >> 64)); + printf("%" PRIx64 "\n", (uint64_t)rng.inc); + for (i = 0; i < N; i++) { + store[i] = pcg64_random_r(&rng); + } + fp = fopen("pcg64-testset-2.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); +} diff --git a/numpy/random/src/pcg64/pcg64.c b/numpy/random/src/pcg64/pcg64.c new file mode 100644 index 000000000..c7c1eb045 --- /dev/null +++ b/numpy/random/src/pcg64/pcg64.c @@ -0,0 +1,118 @@ +/* + * PCG64 Random Number Generation for C. + * + * Copyright 2014 Melissa O'Neill <oneill@pcg-random.org> + * Copyright 2015 Robert Kern <robert.kern@gmail.com> + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For additional information about the PCG random number generation scheme, + * including its license and other licensing options, visit + * + * http://www.pcg-random.org + */ + +#include "pcg64.h" + +extern inline void pcg_setseq_128_step_r(pcg_state_setseq_128 *rng); +extern inline uint64_t pcg_output_xsl_rr_128_64(pcg128_t state); +extern inline void pcg_setseq_128_srandom_r(pcg_state_setseq_128 *rng, + pcg128_t initstate, + pcg128_t initseq); +extern inline uint64_t +pcg_setseq_128_xsl_rr_64_random_r(pcg_state_setseq_128 *rng); +extern inline uint64_t +pcg_setseq_128_xsl_rr_64_boundedrand_r(pcg_state_setseq_128 *rng, + uint64_t bound); +extern inline void pcg_setseq_128_advance_r(pcg_state_setseq_128 *rng, + pcg128_t delta); + +/* Multi-step advance functions (jump-ahead, jump-back) + * + * The method used here is based on Brown, "Random Number Generation + * with Arbitrary Stride,", Transactions of the American Nuclear + * Society (Nov. 1994). The algorithm is very similar to fast + * exponentiation. + * + * Even though delta is an unsigned integer, we can pass a + * signed integer to go backwards, it just goes "the long way round". + */ + +#ifndef PCG_EMULATED_128BIT_MATH + +pcg128_t pcg_advance_lcg_128(pcg128_t state, pcg128_t delta, pcg128_t cur_mult, + pcg128_t cur_plus) { + pcg128_t acc_mult = 1u; + pcg128_t acc_plus = 0u; + while (delta > 0) { + if (delta & 1) { + acc_mult *= cur_mult; + acc_plus = acc_plus * cur_mult + cur_plus; + } + cur_plus = (cur_mult + 1) * cur_plus; + cur_mult *= cur_mult; + delta /= 2; + } + return acc_mult * state + acc_plus; +} + +#else + +pcg128_t pcg_advance_lcg_128(pcg128_t state, pcg128_t delta, pcg128_t cur_mult, + pcg128_t cur_plus) { + pcg128_t acc_mult = PCG_128BIT_CONSTANT(0u, 1u); + pcg128_t acc_plus = PCG_128BIT_CONSTANT(0u, 0u); + while ((delta.high > 0) || (delta.low > 0)) { + if (delta.low & 1) { + acc_mult = _pcg128_mult(acc_mult, cur_mult); + acc_plus = _pcg128_add(_pcg128_mult(acc_plus, cur_mult), cur_plus); + } + cur_plus = _pcg128_mult(_pcg128_add(cur_mult, PCG_128BIT_CONSTANT(0u, 1u)), + cur_plus); + cur_mult = _pcg128_mult(cur_mult, cur_mult); + delta.low >>= 1; + delta.low += delta.high & 1; + delta.high >>= 1; + } + return _pcg128_add(_pcg128_mult(acc_mult, state), acc_plus); +} + +#endif + +extern inline uint64_t pcg64_next64(pcg64_state *state); +extern inline uint32_t pcg64_next32(pcg64_state *state); + +extern void pcg64_advance(pcg64_state *state, uint64_t *step) { + pcg128_t delta; +#if __SIZEOF_INT128__ && !defined(PCG_FORCE_EMULATED_128BIT_MATH) + delta = (((pcg128_t)step[0]) << 64) | step[1]; +#else + delta.high = step[0]; + delta.low = step[1]; +#endif + pcg64_advance_r(state->pcg_state, delta); +} + +extern void pcg64_set_seed(pcg64_state *state, uint64_t *seed, uint64_t *inc) { + pcg128_t s, i; +#if __SIZEOF_INT128__ && !defined(PCG_FORCE_EMULATED_128BIT_MATH) + s = (((pcg128_t)seed[0]) << 64) | seed[1]; + i = (((pcg128_t)inc[0]) << 64) | inc[1]; +#else + s.high = seed[0]; + s.low = seed[1]; + i.high = inc[0]; + i.low = inc[1]; +#endif + pcg64_srandom_r(state->pcg_state, s, i); +} diff --git a/numpy/random/src/pcg64/pcg64.h b/numpy/random/src/pcg64/pcg64.h new file mode 100644 index 000000000..156c73a36 --- /dev/null +++ b/numpy/random/src/pcg64/pcg64.h @@ -0,0 +1,241 @@ +/* + * PCG64 Random Number Generation for C. + * + * Copyright 2014 Melissa O'Neill <oneill@pcg-random.org> + * Copyright 2015 Robert Kern <robert.kern@gmail.com> + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For additional information about the PCG random number generation scheme, + * including its license and other licensing options, visit + * + * http://www.pcg-random.org + */ + +#ifndef PCG64_H_INCLUDED +#define PCG64_H_INCLUDED 1 + +#ifdef _WIN32 +#ifndef _INTTYPES +#include "../common/stdint.h" +#endif +#define inline __inline __forceinline +#else +#include <inttypes.h> +#if _MSC_VER >= 1900 && _M_AMD64 +#include <intrin.h> +#pragma intrinsic(_umul128) +#endif +#endif + +#if __GNUC_GNU_INLINE__ && !defined(__cplusplus) +#error Nonstandard GNU inlining semantics. Compile with -std=c99 or better. +#endif + +#if __cplusplus +extern "C" { +#endif + +#if __SIZEOF_INT128__ && !defined(PCG_FORCE_EMULATED_128BIT_MATH) +typedef __uint128_t pcg128_t; +#define PCG_128BIT_CONSTANT(high, low) (((pcg128_t)(high) << 64) + low) +#else +typedef struct { + uint64_t high; + uint64_t low; +} pcg128_t; + +static inline pcg128_t PCG_128BIT_CONSTANT(uint64_t high, uint64_t low) { + pcg128_t result; + result.high = high; + result.low = low; + return result; +} + +#define PCG_EMULATED_128BIT_MATH 1 +#endif + +typedef struct { pcg128_t state; } pcg_state_128; + +typedef struct { + pcg128_t state; + pcg128_t inc; +} pcg_state_setseq_128; + +#define PCG_DEFAULT_MULTIPLIER_128 \ + PCG_128BIT_CONSTANT(2549297995355413924ULL, 4865540595714422341ULL) +#define PCG_DEFAULT_INCREMENT_128 \ + PCG_128BIT_CONSTANT(6364136223846793005ULL, 1442695040888963407ULL) +#define PCG_STATE_SETSEQ_128_INITIALIZER \ + { \ + PCG_128BIT_CONSTANT(0x979c9a98d8462005ULL, 0x7d3e9cb6cfe0549bULL) \ + , PCG_128BIT_CONSTANT(0x0000000000000001ULL, 0xda3e39cb94b95bdbULL) \ + } + +static inline uint64_t pcg_rotr_64(uint64_t value, unsigned int rot) { + return (value >> rot) | (value << ((-rot) & 63)); +} + +#ifdef PCG_EMULATED_128BIT_MATH + +static inline pcg128_t _pcg128_add(pcg128_t a, pcg128_t b) { + pcg128_t result; + + result.low = a.low + b.low; + result.high = a.high + b.high + (result.low < b.low); + return result; +} + +static inline void _pcg_mult64(uint64_t x, uint64_t y, uint64_t *z1, + uint64_t *z0) { + +#if defined _WIN32 && _MSC_VER >= 1900 && _M_AMD64 + z0[0] = _umul128(x, y, z1); +#else + uint64_t x0, x1, y0, y1; + uint64_t w0, w1, w2, t; + /* Lower 64 bits are straightforward clock-arithmetic. */ + *z0 = x * y; + + x0 = x & 0xFFFFFFFFULL; + x1 = x >> 32; + y0 = y & 0xFFFFFFFFULL; + y1 = y >> 32; + w0 = x0 * y0; + t = x1 * y0 + (w0 >> 32); + w1 = t & 0xFFFFFFFFULL; + w2 = t >> 32; + w1 += x0 * y1; + *z1 = x1 * y1 + w2 + (w1 >> 32); +#endif + +} + +static inline pcg128_t _pcg128_mult(pcg128_t a, pcg128_t b) { + uint64_t h1; + pcg128_t result; + + h1 = a.high * b.low + a.low * b.high; + _pcg_mult64(a.low, b.low, &(result.high), &(result.low)); + result.high += h1; + return result; +} + +static inline void pcg_setseq_128_step_r(pcg_state_setseq_128 *rng) { + rng->state = _pcg128_add(_pcg128_mult(rng->state, PCG_DEFAULT_MULTIPLIER_128), + rng->inc); +} + +static inline uint64_t pcg_output_xsl_rr_128_64(pcg128_t state) { + return pcg_rotr_64(state.high ^ state.low, state.high >> 58u); +} + +static inline void pcg_setseq_128_srandom_r(pcg_state_setseq_128 *rng, + pcg128_t initstate, + pcg128_t initseq) { + rng->state = PCG_128BIT_CONSTANT(0ULL, 0ULL); + rng->inc.high = initseq.high << 1u; + rng->inc.high |= initseq.low & 0x800000000000ULL; + rng->inc.low = (initseq.low << 1u) | 1u; + pcg_setseq_128_step_r(rng); + rng->state = _pcg128_add(rng->state, initstate); + pcg_setseq_128_step_r(rng); +} + +#else /* PCG_EMULATED_128BIT_MATH */ + +static inline void pcg_setseq_128_step_r(pcg_state_setseq_128 *rng) { + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_128 + rng->inc; +} + +static inline uint64_t pcg_output_xsl_rr_128_64(pcg128_t state) { + return pcg_rotr_64(((uint64_t)(state >> 64u)) ^ (uint64_t)state, + state >> 122u); +} + +static inline void pcg_setseq_128_srandom_r(pcg_state_setseq_128 *rng, + pcg128_t initstate, + pcg128_t initseq) { + rng->state = 0U; + rng->inc = (initseq << 1u) | 1u; + pcg_setseq_128_step_r(rng); + rng->state += initstate; + pcg_setseq_128_step_r(rng); +} + +#endif /* PCG_EMULATED_128BIT_MATH */ + +static inline uint64_t +pcg_setseq_128_xsl_rr_64_random_r(pcg_state_setseq_128 *rng) { + pcg_setseq_128_step_r(rng); + return pcg_output_xsl_rr_128_64(rng->state); +} + +static inline uint64_t +pcg_setseq_128_xsl_rr_64_boundedrand_r(pcg_state_setseq_128 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_setseq_128_xsl_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +extern pcg128_t pcg_advance_lcg_128(pcg128_t state, pcg128_t delta, + pcg128_t cur_mult, pcg128_t cur_plus); + +static inline void pcg_setseq_128_advance_r(pcg_state_setseq_128 *rng, + pcg128_t delta) { + rng->state = pcg_advance_lcg_128(rng->state, delta, + PCG_DEFAULT_MULTIPLIER_128, rng->inc); +} + +typedef pcg_state_setseq_128 pcg64_random_t; +#define pcg64_random_r pcg_setseq_128_xsl_rr_64_random_r +#define pcg64_boundedrand_r pcg_setseq_128_xsl_rr_64_boundedrand_r +#define pcg64_srandom_r pcg_setseq_128_srandom_r +#define pcg64_advance_r pcg_setseq_128_advance_r +#define PCG64_INITIALIZER PCG_STATE_SETSEQ_128_INITIALIZER + +#if __cplusplus +} +#endif + +typedef struct s_pcg64_state { + pcg64_random_t *pcg_state; + int has_uint32; + uint32_t uinteger; +} pcg64_state; + +static inline uint64_t pcg64_next64(pcg64_state *state) { + return pcg64_random_r(state->pcg_state); +} + +static inline uint32_t pcg64_next32(pcg64_state *state) { + uint64_t next; + if (state->has_uint32) { + state->has_uint32 = 0; + return state->uinteger; + } + next = pcg64_random_r(state->pcg_state); + state->has_uint32 = 1; + state->uinteger = (uint32_t)(next >> 32); + return (uint32_t)(next & 0xffffffff); +} + +void pcg64_advance(pcg64_state *state, uint64_t *step); + +void pcg64_set_seed(pcg64_state *state, uint64_t *seed, uint64_t *inc); + +#endif /* PCG64_H_INCLUDED */ diff --git a/numpy/random/src/pcg64/pcg64.orig.c b/numpy/random/src/pcg64/pcg64.orig.c new file mode 100644 index 000000000..07e97e4b6 --- /dev/null +++ b/numpy/random/src/pcg64/pcg64.orig.c @@ -0,0 +1,11 @@ +#include "pcg64.orig.h" + +extern inline void pcg_setseq_128_srandom_r(pcg64_random_t *rng, + pcg128_t initstate, + pcg128_t initseq); + +extern uint64_t pcg_rotr_64(uint64_t value, unsigned int rot); +extern inline uint64_t pcg_output_xsl_rr_128_64(pcg128_t state); +extern void pcg_setseq_128_step_r(struct pcg_state_setseq_128 *rng); +extern uint64_t +pcg_setseq_128_xsl_rr_64_random_r(struct pcg_state_setseq_128 *rng); diff --git a/numpy/random/src/pcg64/pcg64.orig.h b/numpy/random/src/pcg64/pcg64.orig.h new file mode 100644 index 000000000..74be91f31 --- /dev/null +++ b/numpy/random/src/pcg64/pcg64.orig.h @@ -0,0 +1,2025 @@ +/* + * PCG Random Number Generation for C. + * + * Copyright 2014 Melissa O'Neill <oneill@pcg-random.org> + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For additional information about the PCG random number generation scheme, + * including its license and other licensing options, visit + * + * http://www.pcg-random.org + */ + +/* + * This code is derived from the canonical C++ PCG implementation, which + * has many additional features and is preferable if you can use C++ in + * your project. + * + * Much of the derivation was performed mechanically. In particular, the + * output functions were generated by compiling the C++ output functions + * into LLVM bitcode and then transforming that using the LLVM C backend + * (from https://github.com/draperlaboratory/llvm-cbe), and then + * postprocessing and hand editing the output. + * + * Much of the remaining code was generated by C-preprocessor metaprogramming. + */ + +#ifndef PCG_VARIANTS_H_INCLUDED +#define PCG_VARIANTS_H_INCLUDED 1 + +#include <inttypes.h> + +#if __SIZEOF_INT128__ +typedef __uint128_t pcg128_t; +#define PCG_128BIT_CONSTANT(high, low) ((((pcg128_t)high) << 64) + low) +#define PCG_HAS_128BIT_OPS 1 +#endif + +#if __GNUC_GNU_INLINE__ && !defined(__cplusplus) +#error Nonstandard GNU inlining semantics. Compile with -std=c99 or better. +// We could instead use macros PCG_INLINE and PCG_EXTERN_INLINE +// but better to just reject ancient C code. +#endif + +#if __cplusplus +extern "C" { +#endif + +/* + * Rotate helper functions. + */ + +inline uint8_t pcg_rotr_8(uint8_t value, unsigned int rot) { +/* Unfortunately, clang is kinda pathetic when it comes to properly + * recognizing idiomatic rotate code, so for clang we actually provide + * assembler directives (enabled with PCG_USE_INLINE_ASM). Boo, hiss. + */ +#if PCG_USE_INLINE_ASM && __clang__ && (__x86_64__ || __i386__) + asm("rorb %%cl, %0" : "=r"(value) : "0"(value), "c"(rot)); + return value; +#else + return (value >> rot) | (value << ((-rot) & 7)); +#endif +} + +inline uint16_t pcg_rotr_16(uint16_t value, unsigned int rot) { +#if PCG_USE_INLINE_ASM && __clang__ && (__x86_64__ || __i386__) + asm("rorw %%cl, %0" : "=r"(value) : "0"(value), "c"(rot)); + return value; +#else + return (value >> rot) | (value << ((-rot) & 15)); +#endif +} + +inline uint32_t pcg_rotr_32(uint32_t value, unsigned int rot) { +#if PCG_USE_INLINE_ASM && __clang__ && (__x86_64__ || __i386__) + asm("rorl %%cl, %0" : "=r"(value) : "0"(value), "c"(rot)); + return value; +#else + return (value >> rot) | (value << ((-rot) & 31)); +#endif +} + +inline uint64_t pcg_rotr_64(uint64_t value, unsigned int rot) { +#if 0 && PCG_USE_INLINE_ASM && __clang__ && __x86_64__ + // For whatever reason, clang actually *does* generate rotq by + // itself, so we don't need this code. + asm ("rorq %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); + return value; +#else + return (value >> rot) | (value << ((-rot) & 63)); +#endif +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t pcg_rotr_128(pcg128_t value, unsigned int rot) { + return (value >> rot) | (value << ((-rot) & 127)); +} +#endif + +/* + * Output functions. These are the core of the PCG generation scheme. + */ + +// XSH RS + +inline uint8_t pcg_output_xsh_rs_16_8(uint16_t state) { + return (uint8_t)(((state >> 7u) ^ state) >> ((state >> 14u) + 3u)); +} + +inline uint16_t pcg_output_xsh_rs_32_16(uint32_t state) { + return (uint16_t)(((state >> 11u) ^ state) >> ((state >> 30u) + 11u)); +} + +inline uint32_t pcg_output_xsh_rs_64_32(uint64_t state) { + + return (uint32_t)(((state >> 22u) ^ state) >> ((state >> 61u) + 22u)); +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_output_xsh_rs_128_64(pcg128_t state) { + return (uint64_t)(((state >> 43u) ^ state) >> ((state >> 124u) + 45u)); +} +#endif + +// XSH RR + +inline uint8_t pcg_output_xsh_rr_16_8(uint16_t state) { + return pcg_rotr_8(((state >> 5u) ^ state) >> 5u, state >> 13u); +} + +inline uint16_t pcg_output_xsh_rr_32_16(uint32_t state) { + return pcg_rotr_16(((state >> 10u) ^ state) >> 12u, state >> 28u); +} + +inline uint32_t pcg_output_xsh_rr_64_32(uint64_t state) { + return pcg_rotr_32(((state >> 18u) ^ state) >> 27u, state >> 59u); +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_output_xsh_rr_128_64(pcg128_t state) { + return pcg_rotr_64(((state >> 29u) ^ state) >> 58u, state >> 122u); +} +#endif + +// RXS M XS + +inline uint8_t pcg_output_rxs_m_xs_8_8(uint8_t state) { + uint8_t word = ((state >> ((state >> 6u) + 2u)) ^ state) * 217u; + return (word >> 6u) ^ word; +} + +inline uint16_t pcg_output_rxs_m_xs_16_16(uint16_t state) { + uint16_t word = ((state >> ((state >> 13u) + 3u)) ^ state) * 62169u; + return (word >> 11u) ^ word; +} + +inline uint32_t pcg_output_rxs_m_xs_32_32(uint32_t state) { + uint32_t word = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u; + return (word >> 22u) ^ word; +} + +inline uint64_t pcg_output_rxs_m_xs_64_64(uint64_t state) { + uint64_t word = + ((state >> ((state >> 59u) + 5u)) ^ state) * 12605985483714917081ull; + return (word >> 43u) ^ word; +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t pcg_output_rxs_m_xs_128_128(pcg128_t state) { + pcg128_t word = + ((state >> ((state >> 122u) + 6u)) ^ state) * + (PCG_128BIT_CONSTANT(17766728186571221404ULL, 12605985483714917081ULL)); + // 327738287884841127335028083622016905945 + return (word >> 86u) ^ word; +} +#endif + +// XSL RR (only defined for >= 64 bits) + +inline uint32_t pcg_output_xsl_rr_64_32(uint64_t state) { + return pcg_rotr_32(((uint32_t)(state >> 32u)) ^ (uint32_t)state, + state >> 59u); +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_output_xsl_rr_128_64(pcg128_t state) { + return pcg_rotr_64(((uint64_t)(state >> 64u)) ^ (uint64_t)state, + state >> 122u); +} +#endif + +// XSL RR RR (only defined for >= 64 bits) + +inline uint64_t pcg_output_xsl_rr_rr_64_64(uint64_t state) { + uint32_t rot1 = (uint32_t)(state >> 59u); + uint32_t high = (uint32_t)(state >> 32u); + uint32_t low = (uint32_t)state; + uint32_t xored = high ^ low; + uint32_t newlow = pcg_rotr_32(xored, rot1); + uint32_t newhigh = pcg_rotr_32(high, newlow & 31u); + return (((uint64_t)newhigh) << 32u) | newlow; +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t pcg_output_xsl_rr_rr_128_128(pcg128_t state) { + uint32_t rot1 = (uint32_t)(state >> 122u); + uint64_t high = (uint64_t)(state >> 64u); + uint64_t low = (uint64_t)state; + uint64_t xored = high ^ low; + uint64_t newlow = pcg_rotr_64(xored, rot1); + uint64_t newhigh = pcg_rotr_64(high, newlow & 63u); + return (((pcg128_t)newhigh) << 64u) | newlow; +} +#endif + +#define PCG_DEFAULT_MULTIPLIER_8 141U +#define PCG_DEFAULT_MULTIPLIER_16 12829U +#define PCG_DEFAULT_MULTIPLIER_32 747796405U +#define PCG_DEFAULT_MULTIPLIER_64 6364136223846793005ULL + +#define PCG_DEFAULT_INCREMENT_8 77U +#define PCG_DEFAULT_INCREMENT_16 47989U +#define PCG_DEFAULT_INCREMENT_32 2891336453U +#define PCG_DEFAULT_INCREMENT_64 1442695040888963407ULL + +#if PCG_HAS_128BIT_OPS +#define PCG_DEFAULT_MULTIPLIER_128 \ + PCG_128BIT_CONSTANT(2549297995355413924ULL, 4865540595714422341ULL) +#define PCG_DEFAULT_INCREMENT_128 \ + PCG_128BIT_CONSTANT(6364136223846793005ULL, 1442695040888963407ULL) +#endif + + /* + * Static initialization constants (if you can't call srandom for some + * bizarre reason). + */ + +#define PCG_STATE_ONESEQ_8_INITIALIZER \ + { 0xd7U } +#define PCG_STATE_ONESEQ_16_INITIALIZER \ + { 0x20dfU } +#define PCG_STATE_ONESEQ_32_INITIALIZER \ + { 0x46b56677U } +#define PCG_STATE_ONESEQ_64_INITIALIZER \ + { 0x4d595df4d0f33173ULL } +#if PCG_HAS_128BIT_OPS +#define PCG_STATE_ONESEQ_128_INITIALIZER \ + { PCG_128BIT_CONSTANT(0xb8dc10e158a92392ULL, 0x98046df007ec0a53ULL) } +#endif + +#define PCG_STATE_UNIQUE_8_INITIALIZER PCG_STATE_ONESEQ_8_INITIALIZER +#define PCG_STATE_UNIQUE_16_INITIALIZER PCG_STATE_ONESEQ_16_INITIALIZER +#define PCG_STATE_UNIQUE_32_INITIALIZER PCG_STATE_ONESEQ_32_INITIALIZER +#define PCG_STATE_UNIQUE_64_INITIALIZER PCG_STATE_ONESEQ_64_INITIALIZER +#if PCG_HAS_128BIT_OPS +#define PCG_STATE_UNIQUE_128_INITIALIZER PCG_STATE_ONESEQ_128_INITIALIZER +#endif + +#define PCG_STATE_MCG_8_INITIALIZER \ + { 0xe5U } +#define PCG_STATE_MCG_16_INITIALIZER \ + { 0xa5e5U } +#define PCG_STATE_MCG_32_INITIALIZER \ + { 0xd15ea5e5U } +#define PCG_STATE_MCG_64_INITIALIZER \ + { 0xcafef00dd15ea5e5ULL } +#if PCG_HAS_128BIT_OPS +#define PCG_STATE_MCG_128_INITIALIZER \ + { PCG_128BIT_CONSTANT(0x0000000000000000ULL, 0xcafef00dd15ea5e5ULL) } +#endif + +#define PCG_STATE_SETSEQ_8_INITIALIZER \ + { 0x9bU, 0xdbU } +#define PCG_STATE_SETSEQ_16_INITIALIZER \ + { 0xe39bU, 0x5bdbU } +#define PCG_STATE_SETSEQ_32_INITIALIZER \ + { 0xec02d89bU, 0x94b95bdbU } +#define PCG_STATE_SETSEQ_64_INITIALIZER \ + { 0x853c49e6748fea9bULL, 0xda3e39cb94b95bdbULL } +#if PCG_HAS_128BIT_OPS +#define PCG_STATE_SETSEQ_128_INITIALIZER \ + { \ + PCG_128BIT_CONSTANT(0x979c9a98d8462005ULL, 0x7d3e9cb6cfe0549bULL) \ + , PCG_128BIT_CONSTANT(0x0000000000000001ULL, 0xda3e39cb94b95bdbULL) \ + } +#endif + +/* Representations for the oneseq, mcg, and unique variants */ + +struct pcg_state_8 { + uint8_t state; +}; + +struct pcg_state_16 { + uint16_t state; +}; + +struct pcg_state_32 { + uint32_t state; +}; + +struct pcg_state_64 { + uint64_t state; +}; + +#if PCG_HAS_128BIT_OPS +struct pcg_state_128 { + pcg128_t state; +}; +#endif + +/* Representations setseq variants */ + +struct pcg_state_setseq_8 { + uint8_t state; + uint8_t inc; +}; + +struct pcg_state_setseq_16 { + uint16_t state; + uint16_t inc; +}; + +struct pcg_state_setseq_32 { + uint32_t state; + uint32_t inc; +}; + +struct pcg_state_setseq_64 { + uint64_t state; + uint64_t inc; +}; + +#if PCG_HAS_128BIT_OPS +struct pcg_state_setseq_128 { + pcg128_t state; + pcg128_t inc; +}; +#endif + +/* Multi-step advance functions (jump-ahead, jump-back) */ + +extern uint8_t pcg_advance_lcg_8(uint8_t state, uint8_t delta, uint8_t cur_mult, + uint8_t cur_plus); +extern uint16_t pcg_advance_lcg_16(uint16_t state, uint16_t delta, + uint16_t cur_mult, uint16_t cur_plus); +extern uint32_t pcg_advance_lcg_32(uint32_t state, uint32_t delta, + uint32_t cur_mult, uint32_t cur_plus); +extern uint64_t pcg_advance_lcg_64(uint64_t state, uint64_t delta, + uint64_t cur_mult, uint64_t cur_plus); + +#if PCG_HAS_128BIT_OPS +extern pcg128_t pcg_advance_lcg_128(pcg128_t state, pcg128_t delta, + pcg128_t cur_mult, pcg128_t cur_plus); +#endif + +/* Functions to advance the underlying LCG, one version for each size and + * each style. These functions are considered semi-private. There is rarely + * a good reason to call them directly. + */ + +inline void pcg_oneseq_8_step_r(struct pcg_state_8 *rng) { + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_8 + PCG_DEFAULT_INCREMENT_8; +} + +inline void pcg_oneseq_8_advance_r(struct pcg_state_8 *rng, uint8_t delta) { + rng->state = pcg_advance_lcg_8(rng->state, delta, PCG_DEFAULT_MULTIPLIER_8, + PCG_DEFAULT_INCREMENT_8); +} + +inline void pcg_mcg_8_step_r(struct pcg_state_8 *rng) { + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_8; +} + +inline void pcg_mcg_8_advance_r(struct pcg_state_8 *rng, uint8_t delta) { + rng->state = + pcg_advance_lcg_8(rng->state, delta, PCG_DEFAULT_MULTIPLIER_8, 0u); +} + +inline void pcg_unique_8_step_r(struct pcg_state_8 *rng) { + rng->state = + rng->state * PCG_DEFAULT_MULTIPLIER_8 + (uint8_t)(((intptr_t)rng) | 1u); +} + +inline void pcg_unique_8_advance_r(struct pcg_state_8 *rng, uint8_t delta) { + rng->state = pcg_advance_lcg_8(rng->state, delta, PCG_DEFAULT_MULTIPLIER_8, + (uint8_t)(((intptr_t)rng) | 1u)); +} + +inline void pcg_setseq_8_step_r(struct pcg_state_setseq_8 *rng) { + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_8 + rng->inc; +} + +inline void pcg_setseq_8_advance_r(struct pcg_state_setseq_8 *rng, + uint8_t delta) { + rng->state = + pcg_advance_lcg_8(rng->state, delta, PCG_DEFAULT_MULTIPLIER_8, rng->inc); +} + +inline void pcg_oneseq_16_step_r(struct pcg_state_16 *rng) { + rng->state = + rng->state * PCG_DEFAULT_MULTIPLIER_16 + PCG_DEFAULT_INCREMENT_16; +} + +inline void pcg_oneseq_16_advance_r(struct pcg_state_16 *rng, uint16_t delta) { + rng->state = pcg_advance_lcg_16(rng->state, delta, PCG_DEFAULT_MULTIPLIER_16, + PCG_DEFAULT_INCREMENT_16); +} + +inline void pcg_mcg_16_step_r(struct pcg_state_16 *rng) { + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_16; +} + +inline void pcg_mcg_16_advance_r(struct pcg_state_16 *rng, uint16_t delta) { + rng->state = + pcg_advance_lcg_16(rng->state, delta, PCG_DEFAULT_MULTIPLIER_16, 0u); +} + +inline void pcg_unique_16_step_r(struct pcg_state_16 *rng) { + rng->state = + rng->state * PCG_DEFAULT_MULTIPLIER_16 + (uint16_t)(((intptr_t)rng) | 1u); +} + +inline void pcg_unique_16_advance_r(struct pcg_state_16 *rng, uint16_t delta) { + rng->state = pcg_advance_lcg_16(rng->state, delta, PCG_DEFAULT_MULTIPLIER_16, + (uint16_t)(((intptr_t)rng) | 1u)); +} + +inline void pcg_setseq_16_step_r(struct pcg_state_setseq_16 *rng) { + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_16 + rng->inc; +} + +inline void pcg_setseq_16_advance_r(struct pcg_state_setseq_16 *rng, + uint16_t delta) { + rng->state = pcg_advance_lcg_16(rng->state, delta, PCG_DEFAULT_MULTIPLIER_16, + rng->inc); +} + +inline void pcg_oneseq_32_step_r(struct pcg_state_32 *rng) { + rng->state = + rng->state * PCG_DEFAULT_MULTIPLIER_32 + PCG_DEFAULT_INCREMENT_32; +} + +inline void pcg_oneseq_32_advance_r(struct pcg_state_32 *rng, uint32_t delta) { + rng->state = pcg_advance_lcg_32(rng->state, delta, PCG_DEFAULT_MULTIPLIER_32, + PCG_DEFAULT_INCREMENT_32); +} + +inline void pcg_mcg_32_step_r(struct pcg_state_32 *rng) { + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_32; +} + +inline void pcg_mcg_32_advance_r(struct pcg_state_32 *rng, uint32_t delta) { + rng->state = + pcg_advance_lcg_32(rng->state, delta, PCG_DEFAULT_MULTIPLIER_32, 0u); +} + +inline void pcg_unique_32_step_r(struct pcg_state_32 *rng) { + rng->state = + rng->state * PCG_DEFAULT_MULTIPLIER_32 + (uint32_t)(((intptr_t)rng) | 1u); +} + +inline void pcg_unique_32_advance_r(struct pcg_state_32 *rng, uint32_t delta) { + rng->state = pcg_advance_lcg_32(rng->state, delta, PCG_DEFAULT_MULTIPLIER_32, + (uint32_t)(((intptr_t)rng) | 1u)); +} + +inline void pcg_setseq_32_step_r(struct pcg_state_setseq_32 *rng) { + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_32 + rng->inc; +} + +inline void pcg_setseq_32_advance_r(struct pcg_state_setseq_32 *rng, + uint32_t delta) { + rng->state = pcg_advance_lcg_32(rng->state, delta, PCG_DEFAULT_MULTIPLIER_32, + rng->inc); +} + +inline void pcg_oneseq_64_step_r(struct pcg_state_64 *rng) { + rng->state = + rng->state * PCG_DEFAULT_MULTIPLIER_64 + PCG_DEFAULT_INCREMENT_64; +} + +inline void pcg_oneseq_64_advance_r(struct pcg_state_64 *rng, uint64_t delta) { + rng->state = pcg_advance_lcg_64(rng->state, delta, PCG_DEFAULT_MULTIPLIER_64, + PCG_DEFAULT_INCREMENT_64); +} + +inline void pcg_mcg_64_step_r(struct pcg_state_64 *rng) { + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_64; +} + +inline void pcg_mcg_64_advance_r(struct pcg_state_64 *rng, uint64_t delta) { + rng->state = + pcg_advance_lcg_64(rng->state, delta, PCG_DEFAULT_MULTIPLIER_64, 0u); +} + +inline void pcg_unique_64_step_r(struct pcg_state_64 *rng) { + rng->state = + rng->state * PCG_DEFAULT_MULTIPLIER_64 + (uint64_t)(((intptr_t)rng) | 1u); +} + +inline void pcg_unique_64_advance_r(struct pcg_state_64 *rng, uint64_t delta) { + rng->state = pcg_advance_lcg_64(rng->state, delta, PCG_DEFAULT_MULTIPLIER_64, + (uint64_t)(((intptr_t)rng) | 1u)); +} + +inline void pcg_setseq_64_step_r(struct pcg_state_setseq_64 *rng) { + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_64 + rng->inc; +} + +inline void pcg_setseq_64_advance_r(struct pcg_state_setseq_64 *rng, + uint64_t delta) { + rng->state = pcg_advance_lcg_64(rng->state, delta, PCG_DEFAULT_MULTIPLIER_64, + rng->inc); +} + +#if PCG_HAS_128BIT_OPS +inline void pcg_oneseq_128_step_r(struct pcg_state_128 *rng) { + rng->state = + rng->state * PCG_DEFAULT_MULTIPLIER_128 + PCG_DEFAULT_INCREMENT_128; +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_oneseq_128_advance_r(struct pcg_state_128 *rng, + pcg128_t delta) { + rng->state = pcg_advance_lcg_128( + rng->state, delta, PCG_DEFAULT_MULTIPLIER_128, PCG_DEFAULT_INCREMENT_128); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_mcg_128_step_r(struct pcg_state_128 *rng) { + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_128; +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_mcg_128_advance_r(struct pcg_state_128 *rng, pcg128_t delta) { + rng->state = + pcg_advance_lcg_128(rng->state, delta, PCG_DEFAULT_MULTIPLIER_128, 0u); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_unique_128_step_r(struct pcg_state_128 *rng) { + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_128 + + (pcg128_t)(((intptr_t)rng) | 1u); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_unique_128_advance_r(struct pcg_state_128 *rng, + pcg128_t delta) { + rng->state = + pcg_advance_lcg_128(rng->state, delta, PCG_DEFAULT_MULTIPLIER_128, + (pcg128_t)(((intptr_t)rng) | 1u)); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_setseq_128_step_r(struct pcg_state_setseq_128 *rng) { + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_128 + rng->inc; +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_setseq_128_advance_r(struct pcg_state_setseq_128 *rng, + pcg128_t delta) { + rng->state = pcg_advance_lcg_128(rng->state, delta, + PCG_DEFAULT_MULTIPLIER_128, rng->inc); +} +#endif + +/* Functions to seed the RNG state, one version for each size and each + * style. Unlike the step functions, regular users can and should call + * these functions. + */ + +inline void pcg_oneseq_8_srandom_r(struct pcg_state_8 *rng, uint8_t initstate) { + rng->state = 0U; + pcg_oneseq_8_step_r(rng); + rng->state += initstate; + pcg_oneseq_8_step_r(rng); +} + +inline void pcg_mcg_8_srandom_r(struct pcg_state_8 *rng, uint8_t initstate) { + rng->state = initstate | 1u; +} + +inline void pcg_unique_8_srandom_r(struct pcg_state_8 *rng, uint8_t initstate) { + rng->state = 0U; + pcg_unique_8_step_r(rng); + rng->state += initstate; + pcg_unique_8_step_r(rng); +} + +inline void pcg_setseq_8_srandom_r(struct pcg_state_setseq_8 *rng, + uint8_t initstate, uint8_t initseq) { + rng->state = 0U; + rng->inc = (initseq << 1u) | 1u; + pcg_setseq_8_step_r(rng); + rng->state += initstate; + pcg_setseq_8_step_r(rng); +} + +inline void pcg_oneseq_16_srandom_r(struct pcg_state_16 *rng, + uint16_t initstate) { + rng->state = 0U; + pcg_oneseq_16_step_r(rng); + rng->state += initstate; + pcg_oneseq_16_step_r(rng); +} + +inline void pcg_mcg_16_srandom_r(struct pcg_state_16 *rng, uint16_t initstate) { + rng->state = initstate | 1u; +} + +inline void pcg_unique_16_srandom_r(struct pcg_state_16 *rng, + uint16_t initstate) { + rng->state = 0U; + pcg_unique_16_step_r(rng); + rng->state += initstate; + pcg_unique_16_step_r(rng); +} + +inline void pcg_setseq_16_srandom_r(struct pcg_state_setseq_16 *rng, + uint16_t initstate, uint16_t initseq) { + rng->state = 0U; + rng->inc = (initseq << 1u) | 1u; + pcg_setseq_16_step_r(rng); + rng->state += initstate; + pcg_setseq_16_step_r(rng); +} + +inline void pcg_oneseq_32_srandom_r(struct pcg_state_32 *rng, + uint32_t initstate) { + rng->state = 0U; + pcg_oneseq_32_step_r(rng); + rng->state += initstate; + pcg_oneseq_32_step_r(rng); +} + +inline void pcg_mcg_32_srandom_r(struct pcg_state_32 *rng, uint32_t initstate) { + rng->state = initstate | 1u; +} + +inline void pcg_unique_32_srandom_r(struct pcg_state_32 *rng, + uint32_t initstate) { + rng->state = 0U; + pcg_unique_32_step_r(rng); + rng->state += initstate; + pcg_unique_32_step_r(rng); +} + +inline void pcg_setseq_32_srandom_r(struct pcg_state_setseq_32 *rng, + uint32_t initstate, uint32_t initseq) { + rng->state = 0U; + rng->inc = (initseq << 1u) | 1u; + pcg_setseq_32_step_r(rng); + rng->state += initstate; + pcg_setseq_32_step_r(rng); +} + +inline void pcg_oneseq_64_srandom_r(struct pcg_state_64 *rng, + uint64_t initstate) { + rng->state = 0U; + pcg_oneseq_64_step_r(rng); + rng->state += initstate; + pcg_oneseq_64_step_r(rng); +} + +inline void pcg_mcg_64_srandom_r(struct pcg_state_64 *rng, uint64_t initstate) { + rng->state = initstate | 1u; +} + +inline void pcg_unique_64_srandom_r(struct pcg_state_64 *rng, + uint64_t initstate) { + rng->state = 0U; + pcg_unique_64_step_r(rng); + rng->state += initstate; + pcg_unique_64_step_r(rng); +} + +inline void pcg_setseq_64_srandom_r(struct pcg_state_setseq_64 *rng, + uint64_t initstate, uint64_t initseq) { + rng->state = 0U; + rng->inc = (initseq << 1u) | 1u; + pcg_setseq_64_step_r(rng); + rng->state += initstate; + pcg_setseq_64_step_r(rng); +} + +#if PCG_HAS_128BIT_OPS +inline void pcg_oneseq_128_srandom_r(struct pcg_state_128 *rng, + pcg128_t initstate) { + rng->state = 0U; + pcg_oneseq_128_step_r(rng); + rng->state += initstate; + pcg_oneseq_128_step_r(rng); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_mcg_128_srandom_r(struct pcg_state_128 *rng, + pcg128_t initstate) { + rng->state = initstate | 1u; +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_unique_128_srandom_r(struct pcg_state_128 *rng, + pcg128_t initstate) { + rng->state = 0U; + pcg_unique_128_step_r(rng); + rng->state += initstate; + pcg_unique_128_step_r(rng); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_setseq_128_srandom_r(struct pcg_state_setseq_128 *rng, + pcg128_t initstate, pcg128_t initseq) { + rng->state = 0U; + rng->inc = (initseq << 1u) | 1u; + pcg_setseq_128_step_r(rng); + rng->state += initstate; + pcg_setseq_128_step_r(rng); +} +#endif + +/* Now, finally we create each of the individual generators. We provide + * a random_r function that provides a random number of the appropriate + * type (using the full range of the type) and a boundedrand_r version + * that provides + * + * Implementation notes for boundedrand_r: + * + * To avoid bias, we need to make the range of the RNG a multiple of + * bound, which we do by dropping output less than a threshold. + * Let's consider a 32-bit case... A naive scheme to calculate the + * threshold would be to do + * + * uint32_t threshold = 0x100000000ull % bound; + * + * but 64-bit div/mod is slower than 32-bit div/mod (especially on + * 32-bit platforms). In essence, we do + * + * uint32_t threshold = (0x100000000ull-bound) % bound; + * + * because this version will calculate the same modulus, but the LHS + * value is less than 2^32. + * + * (Note that using modulo is only wise for good RNGs, poorer RNGs + * such as raw LCGs do better using a technique based on division.) + * Empricical tests show that division is preferable to modulus for + * reducting the range of an RNG. It's faster, and sometimes it can + * even be statistically prefereable. + */ + +/* Generation functions for XSH RS */ + +inline uint8_t pcg_oneseq_16_xsh_rs_8_random_r(struct pcg_state_16 *rng) { + uint16_t oldstate = rng->state; + pcg_oneseq_16_step_r(rng); + return pcg_output_xsh_rs_16_8(oldstate); +} + +inline uint8_t pcg_oneseq_16_xsh_rs_8_boundedrand_r(struct pcg_state_16 *rng, + uint8_t bound) { + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_oneseq_16_xsh_rs_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t pcg_oneseq_32_xsh_rs_16_random_r(struct pcg_state_32 *rng) { + uint32_t oldstate = rng->state; + pcg_oneseq_32_step_r(rng); + return pcg_output_xsh_rs_32_16(oldstate); +} + +inline uint16_t pcg_oneseq_32_xsh_rs_16_boundedrand_r(struct pcg_state_32 *rng, + uint16_t bound) { + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_oneseq_32_xsh_rs_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t pcg_oneseq_64_xsh_rs_32_random_r(struct pcg_state_64 *rng) { + uint64_t oldstate = rng->state; + pcg_oneseq_64_step_r(rng); + return pcg_output_xsh_rs_64_32(oldstate); +} + +inline uint32_t pcg_oneseq_64_xsh_rs_32_boundedrand_r(struct pcg_state_64 *rng, + uint32_t bound) { + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_oneseq_64_xsh_rs_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_oneseq_128_xsh_rs_64_random_r(struct pcg_state_128 *rng) { + pcg_oneseq_128_step_r(rng); + return pcg_output_xsh_rs_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_oneseq_128_xsh_rs_64_boundedrand_r(struct pcg_state_128 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_oneseq_128_xsh_rs_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint8_t pcg_unique_16_xsh_rs_8_random_r(struct pcg_state_16 *rng) { + uint16_t oldstate = rng->state; + pcg_unique_16_step_r(rng); + return pcg_output_xsh_rs_16_8(oldstate); +} + +inline uint8_t pcg_unique_16_xsh_rs_8_boundedrand_r(struct pcg_state_16 *rng, + uint8_t bound) { + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_unique_16_xsh_rs_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t pcg_unique_32_xsh_rs_16_random_r(struct pcg_state_32 *rng) { + uint32_t oldstate = rng->state; + pcg_unique_32_step_r(rng); + return pcg_output_xsh_rs_32_16(oldstate); +} + +inline uint16_t pcg_unique_32_xsh_rs_16_boundedrand_r(struct pcg_state_32 *rng, + uint16_t bound) { + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_unique_32_xsh_rs_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t pcg_unique_64_xsh_rs_32_random_r(struct pcg_state_64 *rng) { + uint64_t oldstate = rng->state; + pcg_unique_64_step_r(rng); + return pcg_output_xsh_rs_64_32(oldstate); +} + +inline uint32_t pcg_unique_64_xsh_rs_32_boundedrand_r(struct pcg_state_64 *rng, + uint32_t bound) { + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_unique_64_xsh_rs_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_unique_128_xsh_rs_64_random_r(struct pcg_state_128 *rng) { + pcg_unique_128_step_r(rng); + return pcg_output_xsh_rs_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_unique_128_xsh_rs_64_boundedrand_r(struct pcg_state_128 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_unique_128_xsh_rs_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint8_t +pcg_setseq_16_xsh_rs_8_random_r(struct pcg_state_setseq_16 *rng) { + uint16_t oldstate = rng->state; + pcg_setseq_16_step_r(rng); + return pcg_output_xsh_rs_16_8(oldstate); +} + +inline uint8_t +pcg_setseq_16_xsh_rs_8_boundedrand_r(struct pcg_state_setseq_16 *rng, + uint8_t bound) { + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_setseq_16_xsh_rs_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t +pcg_setseq_32_xsh_rs_16_random_r(struct pcg_state_setseq_32 *rng) { + uint32_t oldstate = rng->state; + pcg_setseq_32_step_r(rng); + return pcg_output_xsh_rs_32_16(oldstate); +} + +inline uint16_t +pcg_setseq_32_xsh_rs_16_boundedrand_r(struct pcg_state_setseq_32 *rng, + uint16_t bound) { + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_setseq_32_xsh_rs_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t +pcg_setseq_64_xsh_rs_32_random_r(struct pcg_state_setseq_64 *rng) { + uint64_t oldstate = rng->state; + pcg_setseq_64_step_r(rng); + return pcg_output_xsh_rs_64_32(oldstate); +} + +inline uint32_t +pcg_setseq_64_xsh_rs_32_boundedrand_r(struct pcg_state_setseq_64 *rng, + uint32_t bound) { + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_setseq_64_xsh_rs_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_setseq_128_xsh_rs_64_random_r(struct pcg_state_setseq_128 *rng) { + pcg_setseq_128_step_r(rng); + return pcg_output_xsh_rs_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_setseq_128_xsh_rs_64_boundedrand_r(struct pcg_state_setseq_128 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_setseq_128_xsh_rs_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint8_t pcg_mcg_16_xsh_rs_8_random_r(struct pcg_state_16 *rng) { + uint16_t oldstate = rng->state; + pcg_mcg_16_step_r(rng); + return pcg_output_xsh_rs_16_8(oldstate); +} + +inline uint8_t pcg_mcg_16_xsh_rs_8_boundedrand_r(struct pcg_state_16 *rng, + uint8_t bound) { + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_mcg_16_xsh_rs_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t pcg_mcg_32_xsh_rs_16_random_r(struct pcg_state_32 *rng) { + uint32_t oldstate = rng->state; + pcg_mcg_32_step_r(rng); + return pcg_output_xsh_rs_32_16(oldstate); +} + +inline uint16_t pcg_mcg_32_xsh_rs_16_boundedrand_r(struct pcg_state_32 *rng, + uint16_t bound) { + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_mcg_32_xsh_rs_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t pcg_mcg_64_xsh_rs_32_random_r(struct pcg_state_64 *rng) { + uint64_t oldstate = rng->state; + pcg_mcg_64_step_r(rng); + return pcg_output_xsh_rs_64_32(oldstate); +} + +inline uint32_t pcg_mcg_64_xsh_rs_32_boundedrand_r(struct pcg_state_64 *rng, + uint32_t bound) { + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_mcg_64_xsh_rs_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_mcg_128_xsh_rs_64_random_r(struct pcg_state_128 *rng) { + pcg_mcg_128_step_r(rng); + return pcg_output_xsh_rs_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_mcg_128_xsh_rs_64_boundedrand_r(struct pcg_state_128 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_mcg_128_xsh_rs_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +/* Generation functions for XSH RR */ + +inline uint8_t pcg_oneseq_16_xsh_rr_8_random_r(struct pcg_state_16 *rng) { + uint16_t oldstate = rng->state; + pcg_oneseq_16_step_r(rng); + return pcg_output_xsh_rr_16_8(oldstate); +} + +inline uint8_t pcg_oneseq_16_xsh_rr_8_boundedrand_r(struct pcg_state_16 *rng, + uint8_t bound) { + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_oneseq_16_xsh_rr_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t pcg_oneseq_32_xsh_rr_16_random_r(struct pcg_state_32 *rng) { + uint32_t oldstate = rng->state; + pcg_oneseq_32_step_r(rng); + return pcg_output_xsh_rr_32_16(oldstate); +} + +inline uint16_t pcg_oneseq_32_xsh_rr_16_boundedrand_r(struct pcg_state_32 *rng, + uint16_t bound) { + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_oneseq_32_xsh_rr_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t pcg_oneseq_64_xsh_rr_32_random_r(struct pcg_state_64 *rng) { + uint64_t oldstate = rng->state; + pcg_oneseq_64_step_r(rng); + return pcg_output_xsh_rr_64_32(oldstate); +} + +inline uint32_t pcg_oneseq_64_xsh_rr_32_boundedrand_r(struct pcg_state_64 *rng, + uint32_t bound) { + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_oneseq_64_xsh_rr_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_oneseq_128_xsh_rr_64_random_r(struct pcg_state_128 *rng) { + pcg_oneseq_128_step_r(rng); + return pcg_output_xsh_rr_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_oneseq_128_xsh_rr_64_boundedrand_r(struct pcg_state_128 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_oneseq_128_xsh_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint8_t pcg_unique_16_xsh_rr_8_random_r(struct pcg_state_16 *rng) { + uint16_t oldstate = rng->state; + pcg_unique_16_step_r(rng); + return pcg_output_xsh_rr_16_8(oldstate); +} + +inline uint8_t pcg_unique_16_xsh_rr_8_boundedrand_r(struct pcg_state_16 *rng, + uint8_t bound) { + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_unique_16_xsh_rr_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t pcg_unique_32_xsh_rr_16_random_r(struct pcg_state_32 *rng) { + uint32_t oldstate = rng->state; + pcg_unique_32_step_r(rng); + return pcg_output_xsh_rr_32_16(oldstate); +} + +inline uint16_t pcg_unique_32_xsh_rr_16_boundedrand_r(struct pcg_state_32 *rng, + uint16_t bound) { + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_unique_32_xsh_rr_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t pcg_unique_64_xsh_rr_32_random_r(struct pcg_state_64 *rng) { + uint64_t oldstate = rng->state; + pcg_unique_64_step_r(rng); + return pcg_output_xsh_rr_64_32(oldstate); +} + +inline uint32_t pcg_unique_64_xsh_rr_32_boundedrand_r(struct pcg_state_64 *rng, + uint32_t bound) { + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_unique_64_xsh_rr_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_unique_128_xsh_rr_64_random_r(struct pcg_state_128 *rng) { + pcg_unique_128_step_r(rng); + return pcg_output_xsh_rr_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_unique_128_xsh_rr_64_boundedrand_r(struct pcg_state_128 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_unique_128_xsh_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint8_t +pcg_setseq_16_xsh_rr_8_random_r(struct pcg_state_setseq_16 *rng) { + uint16_t oldstate = rng->state; + pcg_setseq_16_step_r(rng); + return pcg_output_xsh_rr_16_8(oldstate); +} + +inline uint8_t +pcg_setseq_16_xsh_rr_8_boundedrand_r(struct pcg_state_setseq_16 *rng, + uint8_t bound) { + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_setseq_16_xsh_rr_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t +pcg_setseq_32_xsh_rr_16_random_r(struct pcg_state_setseq_32 *rng) { + uint32_t oldstate = rng->state; + pcg_setseq_32_step_r(rng); + return pcg_output_xsh_rr_32_16(oldstate); +} + +inline uint16_t +pcg_setseq_32_xsh_rr_16_boundedrand_r(struct pcg_state_setseq_32 *rng, + uint16_t bound) { + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_setseq_32_xsh_rr_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t +pcg_setseq_64_xsh_rr_32_random_r(struct pcg_state_setseq_64 *rng) { + uint64_t oldstate = rng->state; + pcg_setseq_64_step_r(rng); + return pcg_output_xsh_rr_64_32(oldstate); +} + +inline uint32_t +pcg_setseq_64_xsh_rr_32_boundedrand_r(struct pcg_state_setseq_64 *rng, + uint32_t bound) { + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_setseq_64_xsh_rr_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_setseq_128_xsh_rr_64_random_r(struct pcg_state_setseq_128 *rng) { + pcg_setseq_128_step_r(rng); + return pcg_output_xsh_rr_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_setseq_128_xsh_rr_64_boundedrand_r(struct pcg_state_setseq_128 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_setseq_128_xsh_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint8_t pcg_mcg_16_xsh_rr_8_random_r(struct pcg_state_16 *rng) { + uint16_t oldstate = rng->state; + pcg_mcg_16_step_r(rng); + return pcg_output_xsh_rr_16_8(oldstate); +} + +inline uint8_t pcg_mcg_16_xsh_rr_8_boundedrand_r(struct pcg_state_16 *rng, + uint8_t bound) { + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_mcg_16_xsh_rr_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t pcg_mcg_32_xsh_rr_16_random_r(struct pcg_state_32 *rng) { + uint32_t oldstate = rng->state; + pcg_mcg_32_step_r(rng); + return pcg_output_xsh_rr_32_16(oldstate); +} + +inline uint16_t pcg_mcg_32_xsh_rr_16_boundedrand_r(struct pcg_state_32 *rng, + uint16_t bound) { + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_mcg_32_xsh_rr_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t pcg_mcg_64_xsh_rr_32_random_r(struct pcg_state_64 *rng) { + uint64_t oldstate = rng->state; + pcg_mcg_64_step_r(rng); + return pcg_output_xsh_rr_64_32(oldstate); +} + +inline uint32_t pcg_mcg_64_xsh_rr_32_boundedrand_r(struct pcg_state_64 *rng, + uint32_t bound) { + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_mcg_64_xsh_rr_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_mcg_128_xsh_rr_64_random_r(struct pcg_state_128 *rng) { + pcg_mcg_128_step_r(rng); + return pcg_output_xsh_rr_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_mcg_128_xsh_rr_64_boundedrand_r(struct pcg_state_128 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_mcg_128_xsh_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +/* Generation functions for RXS M XS (no MCG versions because they + * don't make sense when you want to use the entire state) + */ + +inline uint8_t pcg_oneseq_8_rxs_m_xs_8_random_r(struct pcg_state_8 *rng) { + uint8_t oldstate = rng->state; + pcg_oneseq_8_step_r(rng); + return pcg_output_rxs_m_xs_8_8(oldstate); +} + +inline uint8_t pcg_oneseq_8_rxs_m_xs_8_boundedrand_r(struct pcg_state_8 *rng, + uint8_t bound) { + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_oneseq_8_rxs_m_xs_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t pcg_oneseq_16_rxs_m_xs_16_random_r(struct pcg_state_16 *rng) { + uint16_t oldstate = rng->state; + pcg_oneseq_16_step_r(rng); + return pcg_output_rxs_m_xs_16_16(oldstate); +} + +inline uint16_t +pcg_oneseq_16_rxs_m_xs_16_boundedrand_r(struct pcg_state_16 *rng, + uint16_t bound) { + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_oneseq_16_rxs_m_xs_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t pcg_oneseq_32_rxs_m_xs_32_random_r(struct pcg_state_32 *rng) { + uint32_t oldstate = rng->state; + pcg_oneseq_32_step_r(rng); + return pcg_output_rxs_m_xs_32_32(oldstate); +} + +inline uint32_t +pcg_oneseq_32_rxs_m_xs_32_boundedrand_r(struct pcg_state_32 *rng, + uint32_t bound) { + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_oneseq_32_rxs_m_xs_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint64_t pcg_oneseq_64_rxs_m_xs_64_random_r(struct pcg_state_64 *rng) { + uint64_t oldstate = rng->state; + pcg_oneseq_64_step_r(rng); + return pcg_output_rxs_m_xs_64_64(oldstate); +} + +inline uint64_t +pcg_oneseq_64_rxs_m_xs_64_boundedrand_r(struct pcg_state_64 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_oneseq_64_rxs_m_xs_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_oneseq_128_rxs_m_xs_128_random_r(struct pcg_state_128 *rng) { + pcg_oneseq_128_step_r(rng); + return pcg_output_rxs_m_xs_128_128(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_oneseq_128_rxs_m_xs_128_boundedrand_r(struct pcg_state_128 *rng, + pcg128_t bound) { + pcg128_t threshold = -bound % bound; + for (;;) { + pcg128_t r = pcg_oneseq_128_rxs_m_xs_128_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint16_t pcg_unique_16_rxs_m_xs_16_random_r(struct pcg_state_16 *rng) { + uint16_t oldstate = rng->state; + pcg_unique_16_step_r(rng); + return pcg_output_rxs_m_xs_16_16(oldstate); +} + +inline uint16_t +pcg_unique_16_rxs_m_xs_16_boundedrand_r(struct pcg_state_16 *rng, + uint16_t bound) { + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_unique_16_rxs_m_xs_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t pcg_unique_32_rxs_m_xs_32_random_r(struct pcg_state_32 *rng) { + uint32_t oldstate = rng->state; + pcg_unique_32_step_r(rng); + return pcg_output_rxs_m_xs_32_32(oldstate); +} + +inline uint32_t +pcg_unique_32_rxs_m_xs_32_boundedrand_r(struct pcg_state_32 *rng, + uint32_t bound) { + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_unique_32_rxs_m_xs_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint64_t pcg_unique_64_rxs_m_xs_64_random_r(struct pcg_state_64 *rng) { + uint64_t oldstate = rng->state; + pcg_unique_64_step_r(rng); + return pcg_output_rxs_m_xs_64_64(oldstate); +} + +inline uint64_t +pcg_unique_64_rxs_m_xs_64_boundedrand_r(struct pcg_state_64 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_unique_64_rxs_m_xs_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_unique_128_rxs_m_xs_128_random_r(struct pcg_state_128 *rng) { + pcg_unique_128_step_r(rng); + return pcg_output_rxs_m_xs_128_128(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_unique_128_rxs_m_xs_128_boundedrand_r(struct pcg_state_128 *rng, + pcg128_t bound) { + pcg128_t threshold = -bound % bound; + for (;;) { + pcg128_t r = pcg_unique_128_rxs_m_xs_128_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint8_t +pcg_setseq_8_rxs_m_xs_8_random_r(struct pcg_state_setseq_8 *rng) { + uint8_t oldstate = rng->state; + pcg_setseq_8_step_r(rng); + return pcg_output_rxs_m_xs_8_8(oldstate); +} + +inline uint8_t +pcg_setseq_8_rxs_m_xs_8_boundedrand_r(struct pcg_state_setseq_8 *rng, + uint8_t bound) { + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_setseq_8_rxs_m_xs_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t +pcg_setseq_16_rxs_m_xs_16_random_r(struct pcg_state_setseq_16 *rng) { + uint16_t oldstate = rng->state; + pcg_setseq_16_step_r(rng); + return pcg_output_rxs_m_xs_16_16(oldstate); +} + +inline uint16_t +pcg_setseq_16_rxs_m_xs_16_boundedrand_r(struct pcg_state_setseq_16 *rng, + uint16_t bound) { + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_setseq_16_rxs_m_xs_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t +pcg_setseq_32_rxs_m_xs_32_random_r(struct pcg_state_setseq_32 *rng) { + uint32_t oldstate = rng->state; + pcg_setseq_32_step_r(rng); + return pcg_output_rxs_m_xs_32_32(oldstate); +} + +inline uint32_t +pcg_setseq_32_rxs_m_xs_32_boundedrand_r(struct pcg_state_setseq_32 *rng, + uint32_t bound) { + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_setseq_32_rxs_m_xs_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint64_t +pcg_setseq_64_rxs_m_xs_64_random_r(struct pcg_state_setseq_64 *rng) { + uint64_t oldstate = rng->state; + pcg_setseq_64_step_r(rng); + return pcg_output_rxs_m_xs_64_64(oldstate); +} + +inline uint64_t +pcg_setseq_64_rxs_m_xs_64_boundedrand_r(struct pcg_state_setseq_64 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_setseq_64_rxs_m_xs_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_setseq_128_rxs_m_xs_128_random_r(struct pcg_state_setseq_128 *rng) { + pcg_setseq_128_step_r(rng); + return pcg_output_rxs_m_xs_128_128(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_setseq_128_rxs_m_xs_128_boundedrand_r(struct pcg_state_setseq_128 *rng, + pcg128_t bound) { + pcg128_t threshold = -bound % bound; + for (;;) { + pcg128_t r = pcg_setseq_128_rxs_m_xs_128_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +/* Generation functions for XSL RR (only defined for "large" types) */ + +inline uint32_t pcg_oneseq_64_xsl_rr_32_random_r(struct pcg_state_64 *rng) { + uint64_t oldstate = rng->state; + pcg_oneseq_64_step_r(rng); + return pcg_output_xsl_rr_64_32(oldstate); +} + +inline uint32_t pcg_oneseq_64_xsl_rr_32_boundedrand_r(struct pcg_state_64 *rng, + uint32_t bound) { + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_oneseq_64_xsl_rr_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_oneseq_128_xsl_rr_64_random_r(struct pcg_state_128 *rng) { + pcg_oneseq_128_step_r(rng); + return pcg_output_xsl_rr_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_oneseq_128_xsl_rr_64_boundedrand_r(struct pcg_state_128 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_oneseq_128_xsl_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint32_t pcg_unique_64_xsl_rr_32_random_r(struct pcg_state_64 *rng) { + uint64_t oldstate = rng->state; + pcg_unique_64_step_r(rng); + return pcg_output_xsl_rr_64_32(oldstate); +} + +inline uint32_t pcg_unique_64_xsl_rr_32_boundedrand_r(struct pcg_state_64 *rng, + uint32_t bound) { + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_unique_64_xsl_rr_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_unique_128_xsl_rr_64_random_r(struct pcg_state_128 *rng) { + pcg_unique_128_step_r(rng); + return pcg_output_xsl_rr_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_unique_128_xsl_rr_64_boundedrand_r(struct pcg_state_128 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_unique_128_xsl_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint32_t +pcg_setseq_64_xsl_rr_32_random_r(struct pcg_state_setseq_64 *rng) { + uint64_t oldstate = rng->state; + pcg_setseq_64_step_r(rng); + return pcg_output_xsl_rr_64_32(oldstate); +} + +inline uint32_t +pcg_setseq_64_xsl_rr_32_boundedrand_r(struct pcg_state_setseq_64 *rng, + uint32_t bound) { + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_setseq_64_xsl_rr_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_setseq_128_xsl_rr_64_random_r(struct pcg_state_setseq_128 *rng) { + pcg_setseq_128_step_r(rng); + return pcg_output_xsl_rr_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_setseq_128_xsl_rr_64_boundedrand_r(struct pcg_state_setseq_128 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_setseq_128_xsl_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint32_t pcg_mcg_64_xsl_rr_32_random_r(struct pcg_state_64 *rng) { + uint64_t oldstate = rng->state; + pcg_mcg_64_step_r(rng); + return pcg_output_xsl_rr_64_32(oldstate); +} + +inline uint32_t pcg_mcg_64_xsl_rr_32_boundedrand_r(struct pcg_state_64 *rng, + uint32_t bound) { + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_mcg_64_xsl_rr_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_mcg_128_xsl_rr_64_random_r(struct pcg_state_128 *rng) { + pcg_mcg_128_step_r(rng); + return pcg_output_xsl_rr_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_mcg_128_xsl_rr_64_boundedrand_r(struct pcg_state_128 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_mcg_128_xsl_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +/* Generation functions for XSL RR RR (only defined for "large" types) */ + +inline uint64_t pcg_oneseq_64_xsl_rr_rr_64_random_r(struct pcg_state_64 *rng) { + uint64_t oldstate = rng->state; + pcg_oneseq_64_step_r(rng); + return pcg_output_xsl_rr_rr_64_64(oldstate); +} + +inline uint64_t +pcg_oneseq_64_xsl_rr_rr_64_boundedrand_r(struct pcg_state_64 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_oneseq_64_xsl_rr_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_oneseq_128_xsl_rr_rr_128_random_r(struct pcg_state_128 *rng) { + pcg_oneseq_128_step_r(rng); + return pcg_output_xsl_rr_rr_128_128(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_oneseq_128_xsl_rr_rr_128_boundedrand_r(struct pcg_state_128 *rng, + pcg128_t bound) { + pcg128_t threshold = -bound % bound; + for (;;) { + pcg128_t r = pcg_oneseq_128_xsl_rr_rr_128_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint64_t pcg_unique_64_xsl_rr_rr_64_random_r(struct pcg_state_64 *rng) { + uint64_t oldstate = rng->state; + pcg_unique_64_step_r(rng); + return pcg_output_xsl_rr_rr_64_64(oldstate); +} + +inline uint64_t +pcg_unique_64_xsl_rr_rr_64_boundedrand_r(struct pcg_state_64 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_unique_64_xsl_rr_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_unique_128_xsl_rr_rr_128_random_r(struct pcg_state_128 *rng) { + pcg_unique_128_step_r(rng); + return pcg_output_xsl_rr_rr_128_128(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_unique_128_xsl_rr_rr_128_boundedrand_r(struct pcg_state_128 *rng, + pcg128_t bound) { + pcg128_t threshold = -bound % bound; + for (;;) { + pcg128_t r = pcg_unique_128_xsl_rr_rr_128_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint64_t +pcg_setseq_64_xsl_rr_rr_64_random_r(struct pcg_state_setseq_64 *rng) { + uint64_t oldstate = rng->state; + pcg_setseq_64_step_r(rng); + return pcg_output_xsl_rr_rr_64_64(oldstate); +} + +inline uint64_t +pcg_setseq_64_xsl_rr_rr_64_boundedrand_r(struct pcg_state_setseq_64 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_setseq_64_xsl_rr_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_setseq_128_xsl_rr_rr_128_random_r(struct pcg_state_setseq_128 *rng) { + pcg_setseq_128_step_r(rng); + return pcg_output_xsl_rr_rr_128_128(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_setseq_128_xsl_rr_rr_128_boundedrand_r(struct pcg_state_setseq_128 *rng, + pcg128_t bound) { + pcg128_t threshold = -bound % bound; + for (;;) { + pcg128_t r = pcg_setseq_128_xsl_rr_rr_128_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +//// Typedefs +typedef struct pcg_state_setseq_64 pcg32_random_t; +typedef struct pcg_state_64 pcg32s_random_t; +typedef struct pcg_state_64 pcg32u_random_t; +typedef struct pcg_state_64 pcg32f_random_t; +//// random_r +#define pcg32_random_r pcg_setseq_64_xsh_rr_32_random_r +#define pcg32s_random_r pcg_oneseq_64_xsh_rr_32_random_r +#define pcg32u_random_r pcg_unique_64_xsh_rr_32_random_r +#define pcg32f_random_r pcg_mcg_64_xsh_rs_32_random_r +//// boundedrand_r +#define pcg32_boundedrand_r pcg_setseq_64_xsh_rr_32_boundedrand_r +#define pcg32s_boundedrand_r pcg_oneseq_64_xsh_rr_32_boundedrand_r +#define pcg32u_boundedrand_r pcg_unique_64_xsh_rr_32_boundedrand_r +#define pcg32f_boundedrand_r pcg_mcg_64_xsh_rs_32_boundedrand_r +//// srandom_r +#define pcg32_srandom_r pcg_setseq_64_srandom_r +#define pcg32s_srandom_r pcg_oneseq_64_srandom_r +#define pcg32u_srandom_r pcg_unique_64_srandom_r +#define pcg32f_srandom_r pcg_mcg_64_srandom_r +//// advance_r +#define pcg32_advance_r pcg_setseq_64_advance_r +#define pcg32s_advance_r pcg_oneseq_64_advance_r +#define pcg32u_advance_r pcg_unique_64_advance_r +#define pcg32f_advance_r pcg_mcg_64_advance_r + +#if PCG_HAS_128BIT_OPS +//// Typedefs +typedef struct pcg_state_setseq_128 pcg64_random_t; +typedef struct pcg_state_128 pcg64s_random_t; +typedef struct pcg_state_128 pcg64u_random_t; +typedef struct pcg_state_128 pcg64f_random_t; +//// random_r +#define pcg64_random_r pcg_setseq_128_xsl_rr_64_random_r +#define pcg64s_random_r pcg_oneseq_128_xsl_rr_64_random_r +#define pcg64u_random_r pcg_unique_128_xsl_rr_64_random_r +#define pcg64f_random_r pcg_mcg_128_xsl_rr_64_random_r +//// boundedrand_r +#define pcg64_boundedrand_r pcg_setseq_128_xsl_rr_64_boundedrand_r +#define pcg64s_boundedrand_r pcg_oneseq_128_xsl_rr_64_boundedrand_r +#define pcg64u_boundedrand_r pcg_unique_128_xsl_rr_64_boundedrand_r +#define pcg64f_boundedrand_r pcg_mcg_128_xsl_rr_64_boundedrand_r +//// srandom_r +#define pcg64_srandom_r pcg_setseq_128_srandom_r +#define pcg64s_srandom_r pcg_oneseq_128_srandom_r +#define pcg64u_srandom_r pcg_unique_128_srandom_r +#define pcg64f_srandom_r pcg_mcg_128_srandom_r +//// advance_r +#define pcg64_advance_r pcg_setseq_128_advance_r +#define pcg64s_advance_r pcg_oneseq_128_advance_r +#define pcg64u_advance_r pcg_unique_128_advance_r +#define pcg64f_advance_r pcg_mcg_128_advance_r +#endif + +//// Typedefs +typedef struct pcg_state_8 pcg8si_random_t; +typedef struct pcg_state_16 pcg16si_random_t; +typedef struct pcg_state_32 pcg32si_random_t; +typedef struct pcg_state_64 pcg64si_random_t; +//// random_r +#define pcg8si_random_r pcg_oneseq_8_rxs_m_xs_8_random_r +#define pcg16si_random_r pcg_oneseq_16_rxs_m_xs_16_random_r +#define pcg32si_random_r pcg_oneseq_32_rxs_m_xs_32_random_r +#define pcg64si_random_r pcg_oneseq_64_rxs_m_xs_64_random_r +//// boundedrand_r +#define pcg8si_boundedrand_r pcg_oneseq_8_rxs_m_xs_8_boundedrand_r +#define pcg16si_boundedrand_r pcg_oneseq_16_rxs_m_xs_16_boundedrand_r +#define pcg32si_boundedrand_r pcg_oneseq_32_rxs_m_xs_32_boundedrand_r +#define pcg64si_boundedrand_r pcg_oneseq_64_rxs_m_xs_64_boundedrand_r +//// srandom_r +#define pcg8si_srandom_r pcg_oneseq_8_srandom_r +#define pcg16si_srandom_r pcg_oneseq_16_srandom_r +#define pcg32si_srandom_r pcg_oneseq_32_srandom_r +#define pcg64si_srandom_r pcg_oneseq_64_srandom_r +//// advance_r +#define pcg8si_advance_r pcg_oneseq_8_advance_r +#define pcg16si_advance_r pcg_oneseq_16_advance_r +#define pcg32si_advance_r pcg_oneseq_32_advance_r +#define pcg64si_advance_r pcg_oneseq_64_advance_r + +#if PCG_HAS_128BIT_OPS +typedef struct pcg_state_128 pcg128si_random_t; +#define pcg128si_random_r pcg_oneseq_128_rxs_m_xs_128_random_r +#define pcg128si_boundedrand_r pcg_oneseq_128_rxs_m_xs_128_boundedrand_r +#define pcg128si_srandom_r pcg_oneseq_128_srandom_r +#define pcg128si_advance_r pcg_oneseq_128_advance_r +#endif + +//// Typedefs +typedef struct pcg_state_setseq_8 pcg8i_random_t; +typedef struct pcg_state_setseq_16 pcg16i_random_t; +typedef struct pcg_state_setseq_32 pcg32i_random_t; +typedef struct pcg_state_setseq_64 pcg64i_random_t; +//// random_r +#define pcg8i_random_r pcg_setseq_8_rxs_m_xs_8_random_r +#define pcg16i_random_r pcg_setseq_16_rxs_m_xs_16_random_r +#define pcg32i_random_r pcg_setseq_32_rxs_m_xs_32_random_r +#define pcg64i_random_r pcg_setseq_64_rxs_m_xs_64_random_r +//// boundedrand_r +#define pcg8i_boundedrand_r pcg_setseq_8_rxs_m_xs_8_boundedrand_r +#define pcg16i_boundedrand_r pcg_setseq_16_rxs_m_xs_16_boundedrand_r +#define pcg32i_boundedrand_r pcg_setseq_32_rxs_m_xs_32_boundedrand_r +#define pcg64i_boundedrand_r pcg_setseq_64_rxs_m_xs_64_boundedrand_r +//// srandom_r +#define pcg8i_srandom_r pcg_setseq_8_srandom_r +#define pcg16i_srandom_r pcg_setseq_16_srandom_r +#define pcg32i_srandom_r pcg_setseq_32_srandom_r +#define pcg64i_srandom_r pcg_setseq_64_srandom_r +//// advance_r +#define pcg8i_advance_r pcg_setseq_8_advance_r +#define pcg16i_advance_r pcg_setseq_16_advance_r +#define pcg32i_advance_r pcg_setseq_32_advance_r +#define pcg64i_advance_r pcg_setseq_64_advance_r + +#if PCG_HAS_128BIT_OPS +typedef struct pcg_state_setseq_128 pcg128i_random_t; +#define pcg128i_random_r pcg_setseq_128_rxs_m_xs_128_random_r +#define pcg128i_boundedrand_r pcg_setseq_128_rxs_m_xs_128_boundedrand_r +#define pcg128i_srandom_r pcg_setseq_128_srandom_r +#define pcg128i_advance_r pcg_setseq_128_advance_r +#endif + +extern uint32_t pcg32_random(); +extern uint32_t pcg32_boundedrand(uint32_t bound); +extern void pcg32_srandom(uint64_t seed, uint64_t seq); +extern void pcg32_advance(uint64_t delta); + +#if PCG_HAS_128BIT_OPS +extern uint64_t pcg64_random(); +extern uint64_t pcg64_boundedrand(uint64_t bound); +extern void pcg64_srandom(pcg128_t seed, pcg128_t seq); +extern void pcg64_advance(pcg128_t delta); +#endif + +/* + * Static initialization constants (if you can't call srandom for some + * bizarre reason). + */ + +#define PCG32_INITIALIZER PCG_STATE_SETSEQ_64_INITIALIZER +#define PCG32U_INITIALIZER PCG_STATE_UNIQUE_64_INITIALIZER +#define PCG32S_INITIALIZER PCG_STATE_ONESEQ_64_INITIALIZER +#define PCG32F_INITIALIZER PCG_STATE_MCG_64_INITIALIZER + +#if PCG_HAS_128BIT_OPS +#define PCG64_INITIALIZER PCG_STATE_SETSEQ_128_INITIALIZER +#define PCG64U_INITIALIZER PCG_STATE_UNIQUE_128_INITIALIZER +#define PCG64S_INITIALIZER PCG_STATE_ONESEQ_128_INITIALIZER +#define PCG64F_INITIALIZER PCG_STATE_MCG_128_INITIALIZER +#endif + +#define PCG8SI_INITIALIZER PCG_STATE_ONESEQ_8_INITIALIZER +#define PCG16SI_INITIALIZER PCG_STATE_ONESEQ_16_INITIALIZER +#define PCG32SI_INITIALIZER PCG_STATE_ONESEQ_32_INITIALIZER +#define PCG64SI_INITIALIZER PCG_STATE_ONESEQ_64_INITIALIZER +#if PCG_HAS_128BIT_OPS +#define PCG128SI_INITIALIZER PCG_STATE_ONESEQ_128_INITIALIZER +#endif + +#define PCG8I_INITIALIZER PCG_STATE_SETSEQ_8_INITIALIZER +#define PCG16I_INITIALIZER PCG_STATE_SETSEQ_16_INITIALIZER +#define PCG32I_INITIALIZER PCG_STATE_SETSEQ_32_INITIALIZER +#define PCG64I_INITIALIZER PCG_STATE_SETSEQ_64_INITIALIZER +#if PCG_HAS_128BIT_OPS +#define PCG128I_INITIALIZER PCG_STATE_SETSEQ_128_INITIALIZER +#endif + +#if __cplusplus +} +#endif + +#endif // PCG_VARIANTS_H_INCLUDED diff --git a/numpy/random/src/philox/LICENSE.md b/numpy/random/src/philox/LICENSE.md new file mode 100644 index 000000000..4a9f6bb29 --- /dev/null +++ b/numpy/random/src/philox/LICENSE.md @@ -0,0 +1,31 @@ +# THREEFRY + +Copyright 2010-2012, D. E. Shaw Research. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright + notice, this list of conditions, and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +* Neither the name of D. E. Shaw Research nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/numpy/random/src/philox/philox-benchmark.c b/numpy/random/src/philox/philox-benchmark.c new file mode 100644 index 000000000..0cab04cf5 --- /dev/null +++ b/numpy/random/src/philox/philox-benchmark.c @@ -0,0 +1,38 @@ +/* + * Simple benchamrk command + * + * cl philox-benchmark.c /Ox + * + * gcc philox-benchmark.c -O3 -o philox-benchmark + * + * Requres the Random123 directory containing header files to be located in the + * same directory (not included). + */ +#include "Random123/philox.h" +#include <inttypes.h> +#include <stdio.h> +#include <time.h> + +#define N 1000000000 + +int main() { + philox4x64_ctr_t ctr = {{0, 0, 0, 0}}; + philox4x64_key_t key = {{0, 0xDEADBEAF}}; + philox4x64_ctr_t out; + uint64_t count = 0, sum = 0; + int i, j; + clock_t begin = clock(); + for (i = 0; i < N / 4UL; i++) { + ctr.v[0]++; + out = philox4x64_R(philox4x64_rounds, ctr, key); + for (j = 0; j < 4; j++) { + sum += out.v[j]; + count++; + } + } + clock_t end = clock(); + double time_spent = (double)(end - begin) / CLOCKS_PER_SEC; + printf("0x%" PRIx64 "\ncount: %" PRIu64 "\n", sum, count); + printf("%" PRIu64 " randoms per second\n", + (uint64_t)(N / time_spent) / 1000000 * 1000000); +} diff --git a/numpy/random/src/philox/philox-test-data-gen.c b/numpy/random/src/philox/philox-test-data-gen.c new file mode 100644 index 000000000..442e18b55 --- /dev/null +++ b/numpy/random/src/philox/philox-test-data-gen.c @@ -0,0 +1,82 @@ +/* + * Generate testing csv files + * + * cl philox-test-data-gen.c /Ox + * philox-test-data-gen.exe + * + * gcc philox-test-data-gen.c -o philox-test-data-gen + * ./philox-test-data-gen + * + * Requres the Random123 directory containing header files to be located in the + * same directory (not included). + * + */ + +#include "../splitmix64/splitmix64.h" +#include "Random123/philox.h" +#include <inttypes.h> +#include <stdio.h> + +#define N 1000 + +int main() { + philox4x64_ctr_t ctr = {{0, 0, 0, 0}}; + philox4x64_key_t key = {{0, 0}}; + uint64_t state, seed = 0xDEADBEAF; + philox4x64_ctr_t out; + uint64_t store[N]; + state = seed; + int i, j; + for (i = 0; i < 2; i++) { + key.v[i] = splitmix64_next(&state); + } + for (i = 0; i < N / 4UL; i++) { + ctr.v[0]++; + out = philox4x64_R(philox4x64_rounds, ctr, key); + for (j = 0; j < 4; j++) { + store[i * 4 + j] = out.v[j]; + } + } + + FILE *fp; + fp = fopen("philox-testset-1.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); + + ctr.v[0] = 0; + state = seed = 0; + for (i = 0; i < 2; i++) { + key.v[i] = splitmix64_next(&state); + } + for (i = 0; i < N / 4UL; i++) { + ctr.v[0]++; + out = philox4x64_R(philox4x64_rounds, ctr, key); + for (j = 0; j < 4; j++) { + store[i * 4 + j] = out.v[j]; + } + } + + fp = fopen("philox-testset-2.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); +} diff --git a/numpy/random/src/philox/philox.c b/numpy/random/src/philox/philox.c new file mode 100644 index 000000000..3382c60d6 --- /dev/null +++ b/numpy/random/src/philox/philox.c @@ -0,0 +1,29 @@ +#include "philox.h" + +extern INLINE uint64_t philox_next64(philox_state *state); + +extern INLINE uint32_t philox_next32(philox_state *state); + +extern void philox_jump(philox_state *state) { + /* Advances state as-if 2^128 draws were made */ + state->ctr->v[2]++; + if (state->ctr->v[2] == 0) { + state->ctr->v[3]++; + } +} + +extern void philox_advance(uint64_t *step, philox_state *state) { + int i, carry = 0; + uint64_t v_orig; + for (i = 0; i < 4; i++) { + if (carry == 1) { + state->ctr->v[i]++; + carry = state->ctr->v[i] == 0 ? 1 : 0; + } + v_orig = state->ctr->v[i]; + state->ctr->v[i] += step[i]; + if (state->ctr->v[i] < v_orig && carry == 0) { + carry = 1; + } + } +} diff --git a/numpy/random/src/philox/philox.h b/numpy/random/src/philox/philox.h new file mode 100644 index 000000000..411404b55 --- /dev/null +++ b/numpy/random/src/philox/philox.h @@ -0,0 +1,253 @@ +#ifndef _RANDOMDGEN__PHILOX_H_ +#define _RANDOMDGEN__PHILOX_H_ + +#include <inttypes.h> + +#ifdef _WIN32 +#define INLINE __inline __forceinline +#else +#define INLINE inline +#endif + +#define PHILOX_BUFFER_SIZE 4L + +struct r123array2x64 { + uint64_t v[2]; +}; +struct r123array4x64 { + uint64_t v[4]; +}; + +enum r123_enum_philox4x64 { philox4x64_rounds = 10 }; +typedef struct r123array4x64 philox4x64_ctr_t; +typedef struct r123array2x64 philox4x64_key_t; +typedef struct r123array2x64 philox4x64_ukey_t; + +static INLINE struct r123array2x64 +_philox4x64bumpkey(struct r123array2x64 key) { + key.v[0] += (0x9E3779B97F4A7C15ULL); + key.v[1] += (0xBB67AE8584CAA73BULL); + return key; +} + +#ifdef _WIN32 +#include <intrin.h> +/* TODO: This isn't correct for many platforms */ +#ifdef _WIN64 +#pragma intrinsic(_umul128) +#else +#pragma intrinsic(__emulu) +static INLINE uint64_t _umul128(uint64_t a, uint64_t b, uint64_t *high) { + + uint64_t a_lo, a_hi, b_lo, b_hi, a_x_b_hi, a_x_b_mid, a_x_b_lo, b_x_a_mid, + carry_bit; + a_lo = (uint32_t)a; + a_hi = a >> 32; + b_lo = (uint32_t)b; + b_hi = b >> 32; + + a_x_b_hi = __emulu(a_hi, b_hi); + a_x_b_mid = __emulu(a_hi, b_lo); + b_x_a_mid = __emulu(b_hi, a_lo); + a_x_b_lo = __emulu(a_lo, b_lo); + + carry_bit = ((uint64_t)(uint32_t)a_x_b_mid + (uint64_t)(uint32_t)b_x_a_mid + + (a_x_b_lo >> 32)) >> + 32; + + *high = a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit; + + return a_x_b_lo + ((a_x_b_mid + b_x_a_mid) << 32); +} +#endif +static INLINE uint64_t mulhilo64(uint64_t a, uint64_t b, uint64_t *hip) { + return _umul128(a, b, hip); +} +#else +#if __SIZEOF_INT128__ +static INLINE uint64_t mulhilo64(uint64_t a, uint64_t b, uint64_t *hip) { + __uint128_t product = ((__uint128_t)a) * ((__uint128_t)b); + *hip = product >> 64; + return (uint64_t)product; +} +#else +static INLINE uint64_t _umul128(uint64_t a, uint64_t b, uint64_t *high) { + + uint64_t a_lo, a_hi, b_lo, b_hi, a_x_b_hi, a_x_b_mid, a_x_b_lo, b_x_a_mid, + carry_bit; + a_lo = (uint32_t)a; + a_hi = a >> 32; + b_lo = (uint32_t)b; + b_hi = b >> 32; + + a_x_b_hi = a_hi * b_hi; + a_x_b_mid = a_hi * b_lo; + b_x_a_mid = b_hi * a_lo; + a_x_b_lo = a_lo * b_lo; + + carry_bit = ((uint64_t)(uint32_t)a_x_b_mid + (uint64_t)(uint32_t)b_x_a_mid + + (a_x_b_lo >> 32)) >> + 32; + + *high = a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit; + + return a_x_b_lo + ((a_x_b_mid + b_x_a_mid) << 32); +} +static INLINE uint64_t mulhilo64(uint64_t a, uint64_t b, uint64_t *hip) { + return _umul128(a, b, hip); +} +#endif +#endif + +static INLINE struct r123array4x64 _philox4x64round(struct r123array4x64 ctr, + struct r123array2x64 key); + +static INLINE struct r123array4x64 _philox4x64round(struct r123array4x64 ctr, + struct r123array2x64 key) { + uint64_t hi0; + uint64_t hi1; + uint64_t lo0 = mulhilo64((0xD2E7470EE14C6C93ULL), ctr.v[0], &hi0); + uint64_t lo1 = mulhilo64((0xCA5A826395121157ULL), ctr.v[2], &hi1); + struct r123array4x64 out = { + {hi1 ^ ctr.v[1] ^ key.v[0], lo1, hi0 ^ ctr.v[3] ^ key.v[1], lo0}}; + return out; +} + +static INLINE philox4x64_key_t philox4x64keyinit(philox4x64_ukey_t uk) { + return uk; +} +static INLINE philox4x64_ctr_t philox4x64_R(unsigned int R, + philox4x64_ctr_t ctr, + philox4x64_key_t key); + +static INLINE philox4x64_ctr_t philox4x64_R(unsigned int R, + philox4x64_ctr_t ctr, + philox4x64_key_t key) { + if (R > 0) { + ctr = _philox4x64round(ctr, key); + } + if (R > 1) { + key = _philox4x64bumpkey(key); + ctr = _philox4x64round(ctr, key); + } + if (R > 2) { + key = _philox4x64bumpkey(key); + ctr = _philox4x64round(ctr, key); + } + if (R > 3) { + key = _philox4x64bumpkey(key); + ctr = _philox4x64round(ctr, key); + } + if (R > 4) { + key = _philox4x64bumpkey(key); + ctr = _philox4x64round(ctr, key); + } + if (R > 5) { + key = _philox4x64bumpkey(key); + ctr = _philox4x64round(ctr, key); + } + if (R > 6) { + key = _philox4x64bumpkey(key); + ctr = _philox4x64round(ctr, key); + } + if (R > 7) { + key = _philox4x64bumpkey(key); + ctr = _philox4x64round(ctr, key); + } + if (R > 8) { + key = _philox4x64bumpkey(key); + ctr = _philox4x64round(ctr, key); + } + if (R > 9) { + key = _philox4x64bumpkey(key); + ctr = _philox4x64round(ctr, key); + } + if (R > 10) { + key = _philox4x64bumpkey(key); + ctr = _philox4x64round(ctr, key); + } + if (R > 11) { + key = _philox4x64bumpkey(key); + ctr = _philox4x64round(ctr, key); + } + if (R > 12) { + key = _philox4x64bumpkey(key); + ctr = _philox4x64round(ctr, key); + } + if (R > 13) { + key = _philox4x64bumpkey(key); + ctr = _philox4x64round(ctr, key); + } + if (R > 14) { + key = _philox4x64bumpkey(key); + ctr = _philox4x64round(ctr, key); + } + if (R > 15) { + key = _philox4x64bumpkey(key); + ctr = _philox4x64round(ctr, key); + } + return ctr; +} + +typedef struct s_philox_state { + philox4x64_ctr_t *ctr; + philox4x64_key_t *key; + int buffer_pos; + uint64_t buffer[PHILOX_BUFFER_SIZE]; + int has_uint32; + uint32_t uinteger; +} philox_state; + +static INLINE uint64_t philox_next(philox_state *state) { + uint64_t out; + int i; + philox4x64_ctr_t ct; + + if (state->buffer_pos < PHILOX_BUFFER_SIZE) { + out = state->buffer[state->buffer_pos]; + state->buffer_pos++; + return out; + } + /* generate 4 new uint64_t */ + state->ctr->v[0]++; + /* Handle carry */ + if (state->ctr->v[0] == 0) { + state->ctr->v[1]++; + if (state->ctr->v[1] == 0) { + state->ctr->v[2]++; + if (state->ctr->v[2] == 0) { + state->ctr->v[3]++; + } + } + } + ct = philox4x64_R(philox4x64_rounds, *state->ctr, *state->key); + for (i = 0; i < 4; i++) { + state->buffer[i] = ct.v[i]; + } + state->buffer_pos = 1; + return state->buffer[0]; +} + +static INLINE uint64_t philox_next64(philox_state *state) { + return philox_next(state); +} + +static INLINE uint32_t philox_next32(philox_state *state) { + uint64_t next; + + if (state->has_uint32) { + state->has_uint32 = 0; + return state->uinteger; + } + next = philox_next(state); + + state->has_uint32 = 1; + state->uinteger = (uint32_t)(next >> 32); + return (uint32_t)(next & 0xffffffff); +} + +extern void philox_jump(philox_state *state); + +extern void philox_advance(uint64_t *step, philox_state *state); + +#endif diff --git a/numpy/random/src/splitmix64/LICENSE.md b/numpy/random/src/splitmix64/LICENSE.md new file mode 100644 index 000000000..3c4d73b92 --- /dev/null +++ b/numpy/random/src/splitmix64/LICENSE.md @@ -0,0 +1,9 @@ +# SPLITMIX64 + +Written in 2015 by Sebastiano Vigna (vigna@acm.org) + +To the extent possible under law, the author has dedicated all copyright +and related and neighboring rights to this software to the public domain +worldwide. This software is distributed without any warranty. + +See <http://creativecommons.org/publicdomain/zero/1.0/>.
\ No newline at end of file diff --git a/numpy/random/src/splitmix64/splitmix64.c b/numpy/random/src/splitmix64/splitmix64.c new file mode 100644 index 000000000..79a845982 --- /dev/null +++ b/numpy/random/src/splitmix64/splitmix64.c @@ -0,0 +1,29 @@ +/* Written in 2015 by Sebastiano Vigna (vigna@acm.org) + +To the extent possible under law, the author has dedicated all copyright +and related and neighboring rights to this software to the public domain +worldwide. This software is distributed without any warranty. + +See <http://creativecommons.org/publicdomain/zero/1.0/>. + +Modified 2018 by Kevin Sheppard. Modifications licensed under the NCSA +license. +*/ + +/* This is a fixed-increment version of Java 8's SplittableRandom generator + See http://dx.doi.org/10.1145/2714064.2660195 and + http://docs.oracle.com/javase/8/docs/api/java/util/SplittableRandom.html + + It is a very fast generator passing BigCrush, and it can be useful if + for some reason you absolutely want 64 bits of state; otherwise, we + rather suggest to use a xoroshiro128+ (for moderately parallel + computations) or xorshift1024* (for massively parallel computations) + generator. */ + +#include "splitmix64.h" + +extern inline uint64_t splitmix64_next(uint64_t *state); + +extern inline uint64_t splitmix64_next64(splitmix64_state *state); + +extern inline uint32_t splitmix64_next32(splitmix64_state *state); diff --git a/numpy/random/src/splitmix64/splitmix64.h b/numpy/random/src/splitmix64/splitmix64.h new file mode 100644 index 000000000..880132970 --- /dev/null +++ b/numpy/random/src/splitmix64/splitmix64.h @@ -0,0 +1,39 @@ +#ifdef _WIN32 +#if _MSC_VER == 1500 +#include "../common/inttypes.h" +#define inline __forceinline +#else +#include <inttypes.h> +#endif +#else +#include <inttypes.h> +#endif + +typedef struct s_splitmix64_state { + uint64_t state; + int has_uint32; + uint32_t uinteger; +} splitmix64_state; + +static inline uint64_t splitmix64_next(uint64_t *state) { + uint64_t z = (state[0] += 0x9e3779b97f4a7c15); + z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9; + z = (z ^ (z >> 27)) * 0x94d049bb133111eb; + return z ^ (z >> 31); +} + +static inline uint64_t splitmix64_next64(splitmix64_state *state) { + return splitmix64_next(&state->state); +} + +static inline uint32_t splitmix64_next32(splitmix64_state *state) { + uint64_t next; + if (state->has_uint32) { + state->has_uint32 = 0; + return state->uinteger; + } + next = splitmix64_next64(state); + state->has_uint32 = 1; + state->uinteger = (uint32_t)(next >> 32); + return (uint32_t)(next & 0xffffffff); +} diff --git a/numpy/random/src/splitmix64/splitmix64.orig.c b/numpy/random/src/splitmix64/splitmix64.orig.c new file mode 100644 index 000000000..df6133aab --- /dev/null +++ b/numpy/random/src/splitmix64/splitmix64.orig.c @@ -0,0 +1,28 @@ +/* Written in 2015 by Sebastiano Vigna (vigna@acm.org) + +To the extent possible under law, the author has dedicated all copyright +and related and neighboring rights to this software to the public domain +worldwide. This software is distributed without any warranty. + +See <http://creativecommons.org/publicdomain/zero/1.0/>. */ + +#include <stdint.h> + +/* This is a fixed-increment version of Java 8's SplittableRandom generator + See http://dx.doi.org/10.1145/2714064.2660195 and + http://docs.oracle.com/javase/8/docs/api/java/util/SplittableRandom.html + + It is a very fast generator passing BigCrush, and it can be useful if + for some reason you absolutely want 64 bits of state; otherwise, we + rather suggest to use a xoroshiro128+ (for moderately parallel + computations) or xorshift1024* (for massively parallel computations) + generator. */ + +uint64_t x; /* The state can be seeded with any value. */ + +uint64_t next() { + uint64_t z = (x += 0x9e3779b97f4a7c15); + z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9; + z = (z ^ (z >> 27)) * 0x94d049bb133111eb; + return z ^ (z >> 31); +} diff --git a/numpy/random/src/threefry/LICENSE.md b/numpy/random/src/threefry/LICENSE.md new file mode 100644 index 000000000..4a9f6bb29 --- /dev/null +++ b/numpy/random/src/threefry/LICENSE.md @@ -0,0 +1,31 @@ +# THREEFRY + +Copyright 2010-2012, D. E. Shaw Research. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright + notice, this list of conditions, and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +* Neither the name of D. E. Shaw Research nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/numpy/random/src/threefry/threefry-benchmark.c b/numpy/random/src/threefry/threefry-benchmark.c new file mode 100644 index 000000000..6d6239cd3 --- /dev/null +++ b/numpy/random/src/threefry/threefry-benchmark.c @@ -0,0 +1,38 @@ +/* + * Simple benchamrk command + * + * cl threefry-benchmark.c /Ox + * + * gcc threefry-benchmark.c -O3 -o threefry-benchmark + * + * Requres the Random123 directory containing header files to be located in the + * same directory (not included). + */ +#include "Random123/threefry.h" +#include <inttypes.h> +#include <stdio.h> +#include <time.h> + +#define N 1000000000 + +int main() { + threefry4x64_key_t ctr = {{0, 0, 0, 0}}; + threefry4x64_ctr_t key = {{0xDEADBEAF, 0, 0, 0}}; + threefry4x64_ctr_t out; + uint64_t count = 0, sum = 0; + int i, j; + clock_t begin = clock(); + for (i = 0; i < N / 4UL; i++) { + ctr.v[0]++; + out = threefry4x64_R(threefry4x64_rounds, ctr, key); + for (j = 0; j < 4; j++) { + sum += out.v[j]; + count++; + } + } + clock_t end = clock(); + double time_spent = (double)(end - begin) / CLOCKS_PER_SEC; + printf("0x%" PRIx64 "\ncount: %" PRIu64 "\n", sum, count); + printf("%" PRIu64 " randoms per second\n", + (uint64_t)(N / time_spent) / 1000000 * 1000000); +} diff --git a/numpy/random/src/threefry/threefry-orig.c b/numpy/random/src/threefry/threefry-orig.c new file mode 100644 index 000000000..d27cfd797 --- /dev/null +++ b/numpy/random/src/threefry/threefry-orig.c @@ -0,0 +1,83 @@ +/* +Copyright (c) 2017, Pierre de Buyl + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "threefry.h" +#include <stdint.h> + +#define N_WORDS 2 +#define KEY_LENGTH 3 +#define C240 0x1BD11BDAA9FC1A22 +#define N_ROUNDS 20 +#define MASK 0xffffffffffffffff +#define DOUBLE_MULT 5.421010862427522e-20 + +static const int ROTATION[] = {16, 42, 12, 31, 16, 32, 24, 21}; + +uint64_t rotl_64(uint64_t x, int d) { return ((x << d) | (x >> (64 - d))); } + +threefry_t mix(threefry_t x, int R) { + x.c0 += x.c1; + x.c1 = rotl_64(x.c1, R) ^ x.c0; + return x; +} + +threefry_t threefry(threefry_t p, threefry_t k) { + uint64_t K[] = {k.c0, k.c1, C240 ^ k.c0 ^ k.c1}; + int rmod4, rdiv4; + threefry_t x; + x = p; + for (int r = 0; r < N_ROUNDS; r++) { + rmod4 = r % 4; + if (rmod4 == 0) { + rdiv4 = r / 4; + x.c0 += K[rdiv4 % KEY_LENGTH]; + x.c1 += K[(rdiv4 + 1) % KEY_LENGTH] + rdiv4; + } + x = mix(x, ROTATION[r % 8]); + } + x.c0 += K[(N_ROUNDS / 4) % KEY_LENGTH]; + x.c1 += K[(N_ROUNDS / 4 + 1) % KEY_LENGTH] + N_ROUNDS / 4; + return x; +} + +uint64_t threefry_uint64(threefry_t *c, threefry_t *k) { + threefry_t x; + x = threefry(*c, *k); + c->c0++; + return x.c0; +} + +double threefry_double(threefry_t *c, threefry_t *k) { + threefry_t x; + x = threefry(*c, *k); + c->c0++; + return x.c0 * DOUBLE_MULT; +} diff --git a/numpy/random/src/threefry/threefry-test-data-gen.c b/numpy/random/src/threefry/threefry-test-data-gen.c new file mode 100644 index 000000000..328eb2575 --- /dev/null +++ b/numpy/random/src/threefry/threefry-test-data-gen.c @@ -0,0 +1,83 @@ +/* + * Generate testing csv files + * + * cl threefry-test-data-gen.c /Ox ../splitmix64/splitmix64.c /Ox + * threefry-test-data-gen.exe + * + * gcc threefry-test-data-gen.c ../splitmix64/splitmix64.c /Ox -o + * threefry-test-data-gen + * ./threefry-test-data-gen + * + * Requres the Random123 directory containing header files to be located in the + * same directory (not included). + * + */ + +#include "../splitmix64/splitmix64.h" +#include "Random123/threefry.h" +#include <inttypes.h> +#include <stdio.h> + +#define N 1000 + +int main() { + threefry4x64_key_t ctr = {{0, 0, 0, 0}}; + uint64_t state, seed = 0xDEADBEAF; + state = seed; + threefry4x64_ctr_t key = {{0}}; + threefry4x64_ctr_t out; + uint64_t store[N]; + int i, j; + for (i = 0; i < 4; i++) { + key.v[i] = splitmix64_next(&state); + } + for (i = 0; i < N / 4UL; i++) { + ctr.v[0]++; + out = threefry4x64_R(threefry4x64_rounds, ctr, key); + for (j = 0; j < 4; j++) { + store[i * 4 + j] = out.v[j]; + } + } + + FILE *fp; + fp = fopen("threefry-testset-1.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); + + ctr.v[0] = 0; + state = seed = 0; + for (i = 0; i < 4; i++) { + key.v[i] = splitmix64_next(&state); + } + for (i = 0; i < N / 4; i++) { + ctr.v[0]++; + out = threefry4x64_R(threefry4x64_rounds, ctr, key); + for (j = 0; j < 4; j++) { + store[i * 4 + j] = out.v[j]; + } + } + + fp = fopen("threefry-testset-2.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); +} diff --git a/numpy/random/src/threefry/threefry.c b/numpy/random/src/threefry/threefry.c new file mode 100644 index 000000000..19c37df1b --- /dev/null +++ b/numpy/random/src/threefry/threefry.c @@ -0,0 +1,29 @@ +#include "threefry.h" + +extern INLINE uint64_t threefry_next64(threefry_state *state); + +extern INLINE uint32_t threefry_next32(threefry_state *state); + +extern void threefry_jump(threefry_state *state) { + /* Advances state as-if 2^128 draws were made */ + state->ctr->v[2]++; + if (state->ctr->v[2] == 0) { + state->ctr->v[3]++; + } +} + +extern void threefry_advance(uint64_t *step, threefry_state *state) { + int i, carry = 0; + uint64_t v_orig; + for (i = 0; i < 4; i++) { + if (carry == 1) { + state->ctr->v[i]++; + carry = state->ctr->v[i] == 0 ? 1 : 0; + } + v_orig = state->ctr->v[i]; + state->ctr->v[i] += step[i]; + if (state->ctr->v[i] < v_orig && carry == 0) { + carry = 1; + } + } +} diff --git a/numpy/random/src/threefry/threefry.h b/numpy/random/src/threefry/threefry.h new file mode 100644 index 000000000..297c1241a --- /dev/null +++ b/numpy/random/src/threefry/threefry.h @@ -0,0 +1,341 @@ +/* +Adapted from random123's threefry.h +*/ +#ifndef _RANDOMDGEN__THREEFRY_H_ +#define _RANDOMDGEN__THREEFRY_H_ + +#ifdef _WIN32 +#if _MSC_VER == 1500 +#include "../common/inttypes.h" +#define INLINE __forceinline +#else +#include <inttypes.h> +#define INLINE __inline __forceinline +#endif +#else +#include <inttypes.h> +#define INLINE inline +#endif + +#define THREEFRY_BUFFER_SIZE 4L + +enum r123_enum_threefry64x4 { + /* These are the R_256 constants from the Threefish reference sources + with names changed to R_64x4... */ + R_64x4_0_0 = 14, + R_64x4_0_1 = 16, + R_64x4_1_0 = 52, + R_64x4_1_1 = 57, + R_64x4_2_0 = 23, + R_64x4_2_1 = 40, + R_64x4_3_0 = 5, + R_64x4_3_1 = 37, + R_64x4_4_0 = 25, + R_64x4_4_1 = 33, + R_64x4_5_0 = 46, + R_64x4_5_1 = 12, + R_64x4_6_0 = 58, + R_64x4_6_1 = 22, + R_64x4_7_0 = 32, + R_64x4_7_1 = 32 +}; + +struct r123array4x64 { + uint64_t v[4]; +}; /* r123array4x64 */ + +typedef struct r123array4x64 threefry4x64_key_t; +typedef struct r123array4x64 threefry4x64_ctr_t; + +static INLINE uint64_t RotL_64(uint64_t x, unsigned int N); +static INLINE uint64_t RotL_64(uint64_t x, unsigned int N) { + return (x << (N & 63)) | (x >> ((64 - N) & 63)); +} + +static INLINE threefry4x64_ctr_t threefry4x64_R(unsigned int Nrounds, + threefry4x64_ctr_t in, + threefry4x64_key_t k); +static INLINE threefry4x64_ctr_t threefry4x64_R(unsigned int Nrounds, + threefry4x64_ctr_t in, + threefry4x64_key_t k) { + threefry4x64_ctr_t X; + uint64_t ks[4 + 1]; + int i; + ks[4] = ((0xA9FC1A22) + (((uint64_t)(0x1BD11BDA)) << 32)); + for (i = 0; i < 4; i++) { + ks[i] = k.v[i]; + X.v[i] = in.v[i]; + ks[4] ^= k.v[i]; + } + X.v[0] += ks[0]; + X.v[1] += ks[1]; + X.v[2] += ks[2]; + X.v[3] += ks[3]; + if (Nrounds > 0) { + X.v[0] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_0_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_0_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 1) { + X.v[0] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_1_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_1_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 2) { + X.v[0] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_2_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_2_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 3) { + X.v[0] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_3_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_3_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 3) { + X.v[0] += ks[1]; + X.v[1] += ks[2]; + X.v[2] += ks[3]; + X.v[3] += ks[4]; + X.v[4 - 1] += 1; + } + if (Nrounds > 4) { + X.v[0] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_4_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_4_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 5) { + X.v[0] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_5_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_5_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 6) { + X.v[0] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_6_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_6_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 7) { + X.v[0] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_7_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_7_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 7) { + X.v[0] += ks[2]; + X.v[1] += ks[3]; + X.v[2] += ks[4]; + X.v[3] += ks[0]; + X.v[4 - 1] += 2; + } + if (Nrounds > 8) { + X.v[0] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_0_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_0_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 9) { + X.v[0] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_1_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_1_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 10) { + X.v[0] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_2_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_2_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 11) { + X.v[0] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_3_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_3_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 11) { + X.v[0] += ks[3]; + X.v[1] += ks[4]; + X.v[2] += ks[0]; + X.v[3] += ks[1]; + X.v[4 - 1] += 3; + } + if (Nrounds > 12) { + X.v[0] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_4_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_4_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 13) { + X.v[0] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_5_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_5_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 14) { + X.v[0] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_6_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_6_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 15) { + X.v[0] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_7_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_7_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 15) { + X.v[0] += ks[4]; + X.v[1] += ks[0]; + X.v[2] += ks[1]; + X.v[3] += ks[2]; + X.v[4 - 1] += 4; + } + if (Nrounds > 16) { + X.v[0] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_0_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_0_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 17) { + X.v[0] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_1_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_1_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 18) { + X.v[0] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_2_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_2_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 19) { + X.v[0] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_3_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_3_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 19) { + X.v[0] += ks[0]; + X.v[1] += ks[1]; + X.v[2] += ks[2]; + X.v[3] += ks[3]; + X.v[4 - 1] += 5; + } + /* Maximum of 20 rounds */ + if (Nrounds > 20) { + X.v[0] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_4_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_4_1); + X.v[3] ^= X.v[2]; + } + return X; +} +enum r123_enum_threefry4x64 { threefry4x64_rounds = 20 }; + +typedef struct s_threefry_state { + threefry4x64_key_t *ctr; + threefry4x64_ctr_t *key; + int buffer_pos; + uint64_t buffer[THREEFRY_BUFFER_SIZE]; + int has_uint32; + uint32_t uinteger; +} threefry_state; + +static INLINE uint64_t threefry_next(threefry_state *state) { + int i; + threefry4x64_ctr_t ct; + uint64_t out; + if (state->buffer_pos < THREEFRY_BUFFER_SIZE) { + out = state->buffer[state->buffer_pos]; + state->buffer_pos++; + return out; + } + /* generate 4 new uint64_t */ + state->ctr->v[0]++; + /* Handle carry */ + if (state->ctr->v[0] == 0) { + state->ctr->v[1]++; + if (state->ctr->v[1] == 0) { + state->ctr->v[2]++; + if (state->ctr->v[2] == 0) { + state->ctr->v[3]++; + } + } + } + ct = threefry4x64_R(threefry4x64_rounds, *state->ctr, *state->key); + for (i = 0; i < 4; i++) { + state->buffer[i] = ct.v[i]; + } + state->buffer_pos = 1; + return state->buffer[0]; +} + +static INLINE uint64_t threefry_next64(threefry_state *state) { + return threefry_next(state); +} + +static INLINE uint32_t threefry_next32(threefry_state *state) { + uint64_t next; + if (state->has_uint32) { + state->has_uint32 = 0; + return state->uinteger; + } + next = threefry_next(state); + + state->has_uint32 = 1; + state->uinteger = (uint32_t)(next >> 32); + return (uint32_t)(next & 0xffffffff); +} + +extern void threefry_jump(threefry_state *state); + +extern void threefry_advance(uint64_t *step, threefry_state *state); + +#endif diff --git a/numpy/random/src/threefry32/LICENSE.md b/numpy/random/src/threefry32/LICENSE.md new file mode 100644 index 000000000..591cd75f4 --- /dev/null +++ b/numpy/random/src/threefry32/LICENSE.md @@ -0,0 +1,31 @@ +# THREEFRY32 + +Copyright 2010-2012, D. E. Shaw Research. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright + notice, this list of conditions, and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +* Neither the name of D. E. Shaw Research nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/numpy/random/src/threefry32/threefry32-test-data-gen.c b/numpy/random/src/threefry32/threefry32-test-data-gen.c new file mode 100644 index 000000000..0e6229995 --- /dev/null +++ b/numpy/random/src/threefry32/threefry32-test-data-gen.c @@ -0,0 +1,88 @@ +/* + * Generate testing csv files + * + * cl threefry32-test-data-gen.c /Ox ../splitmix64/splitmix64.c /Ox + * threefry32-test-data-gen.exe + * + * gcc threefry32-test-data-gen.c ../splitmix64/splitmix64.c /Ox -o + * threefry32-test-data-gen + * ./threefry32-test-data-gen + * + * Requires the Random123 directory containing header files to be located in the + * same directory (not included). + * + */ + +#include "../splitmix64/splitmix64.h" +#include "Random123/threefry.h" +#include <inttypes.h> +#include <stdio.h> + +#define N 1000 + +int main() { + threefry4x32_key_t ctr = {{0, 0, 0, 0}}; + uint64_t state, seed = 0xDEADBEAF; + state = seed; + threefry4x32_ctr_t key = {{0}}; + threefry4x32_ctr_t out; + uint64_t store[N]; + uint64_t seed_val; + int i, j; + for (i = 0; i < 4; i++) { + seed_val = splitmix64_next(&state); + key.v[2*i] = (uint32_t)seed_val; + key.v[2*i+1] = (uint32_t)(seed_val >> 32); + } + for (i = 0; i < N / 4UL; i++) { + ctr.v[0]++; + out = threefry4x32_R(threefry4x32_rounds, ctr, key); + for (j = 0; j < 4; j++) { + store[i * 4 + j] = out.v[j]; + } + } + + FILE *fp; + fp = fopen("threefry32-testset-1.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); + + ctr.v[0] = 0; + state = seed = 0; + for (i = 0; i < 4; i++) { + seed_val = splitmix64_next(&state); + key.v[2*i] = (uint32_t)seed_val; + key.v[2*i+1] = (uint32_t)(seed_val >> 32); + } + for (i = 0; i < N / 4; i++) { + ctr.v[0]++; + out = threefry4x32_R(threefry4x32_rounds, ctr, key); + for (j = 0; j < 4; j++) { + store[i * 4 + j] = out.v[j]; + } + } + + fp = fopen("threefry32-testset-2.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); +} diff --git a/numpy/random/src/threefry32/threefry32.c b/numpy/random/src/threefry32/threefry32.c new file mode 100644 index 000000000..500e9482d --- /dev/null +++ b/numpy/random/src/threefry32/threefry32.c @@ -0,0 +1,29 @@ +#include "threefry32.h" + +extern INLINE uint64_t threefry32_next64(threefry32_state *state); + +extern INLINE uint32_t threefry32_next32(threefry32_state *state); + +extern void threefry32_jump(threefry32_state *state) { + /* Advances state as-if 2^64 draws were made */ + state->ctr->v[2]++; + if (state->ctr->v[2] == 0) { + state->ctr->v[3]++; + } +} + +extern void threefry32_advance(uint32_t *step, threefry32_state *state) { + int i, carry = 0; + uint32_t v_orig; + for (i = 0; i < 4; i++) { + if (carry == 1) { + state->ctr->v[i]++; + carry = state->ctr->v[i] == 0 ? 1 : 0; + } + v_orig = state->ctr->v[i]; + state->ctr->v[i] += step[i]; + if (state->ctr->v[i] < v_orig && carry == 0) { + carry = 1; + } + } +} diff --git a/numpy/random/src/threefry32/threefry32.h b/numpy/random/src/threefry32/threefry32.h new file mode 100644 index 000000000..74a85c42b --- /dev/null +++ b/numpy/random/src/threefry32/threefry32.h @@ -0,0 +1,842 @@ +/* +Adapted from random123's threefry.h +*/ +#ifndef _RANDOMDGEN__THREEFRY32_H_ +#define _RANDOMDGEN__THREEFRY32_H_ + +#ifdef _WIN32 +#if _MSC_VER == 1500 +#include "../common/inttypes.h" +#define INLINE __forceinline +#else +#include <inttypes.h> +#define INLINE __inline __forceinline +#endif +#else +#include <inttypes.h> +#define INLINE inline +#endif + +#define THREEFRY_BUFFER_SIZE 4L + +static INLINE uint32_t RotL_32(uint32_t x, unsigned int N); +static INLINE uint32_t RotL_32(uint32_t x, unsigned int N) { + return (x << (N & 31)) | (x >> ((32 - N) & 31)); +} + +struct r123array4x32 { + uint32_t v[4]; +}; + +enum r123_enum_threefry32x4 { + + R_32x4_0_0 = 10, + R_32x4_0_1 = 26, + R_32x4_1_0 = 11, + R_32x4_1_1 = 21, + R_32x4_2_0 = 13, + R_32x4_2_1 = 27, + R_32x4_3_0 = 23, + R_32x4_3_1 = 5, + R_32x4_4_0 = 6, + R_32x4_4_1 = 20, + R_32x4_5_0 = 17, + R_32x4_5_1 = 11, + R_32x4_6_0 = 25, + R_32x4_6_1 = 10, + R_32x4_7_0 = 18, + R_32x4_7_1 = 20 + +}; + +typedef struct r123array4x32 threefry4x32_ctr_t; +typedef struct r123array4x32 threefry4x32_key_t; +typedef struct r123array4x32 threefry4x32_ukey_t; +static INLINE threefry4x32_key_t threefry4x32keyinit(threefry4x32_ukey_t uk) { + return uk; +}; +static INLINE threefry4x32_ctr_t threefry4x32_R(unsigned int Nrounds, + threefry4x32_ctr_t in, + threefry4x32_key_t k); +static INLINE threefry4x32_ctr_t threefry4x32_R(unsigned int Nrounds, + threefry4x32_ctr_t in, + threefry4x32_key_t k) { + threefry4x32_ctr_t X; + uint32_t ks[4 + 1]; + int i; + ks[4] = 0x1BD11BDA; + for (i = 0; i < 4; i++) { + ks[i] = k.v[i]; + X.v[i] = in.v[i]; + ks[4] ^= k.v[i]; + } + X.v[0] += ks[0]; + X.v[1] += ks[1]; + X.v[2] += ks[2]; + X.v[3] += ks[3]; + if (Nrounds > 0) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_0_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_0_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 1) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_1_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_1_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 2) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_2_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_2_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 3) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_3_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_3_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 3) { + X.v[0] += ks[1]; + X.v[1] += ks[2]; + X.v[2] += ks[3]; + X.v[3] += ks[4]; + X.v[4 - 1] += 1; + } + if (Nrounds > 4) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_4_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_4_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 5) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_5_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_5_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 6) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_6_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_6_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 7) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_7_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_7_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 7) { + X.v[0] += ks[2]; + X.v[1] += ks[3]; + X.v[2] += ks[4]; + X.v[3] += ks[0]; + X.v[4 - 1] += 2; + } + if (Nrounds > 8) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_0_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_0_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 9) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_1_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_1_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 10) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_2_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_2_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 11) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_3_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_3_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 11) { + X.v[0] += ks[3]; + X.v[1] += ks[4]; + X.v[2] += ks[0]; + X.v[3] += ks[1]; + X.v[4 - 1] += 3; + } + if (Nrounds > 12) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_4_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_4_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 13) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_5_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_5_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 14) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_6_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_6_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 15) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_7_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_7_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 15) { + X.v[0] += ks[4]; + X.v[1] += ks[0]; + X.v[2] += ks[1]; + X.v[3] += ks[2]; + X.v[4 - 1] += 4; + } + if (Nrounds > 16) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_0_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_0_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 17) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_1_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_1_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 18) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_2_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_2_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 19) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_3_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_3_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 19) { + X.v[0] += ks[0]; + X.v[1] += ks[1]; + X.v[2] += ks[2]; + X.v[3] += ks[3]; + X.v[4 - 1] += 5; + } + if (Nrounds > 20) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_4_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_4_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 21) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_5_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_5_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 22) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_6_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_6_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 23) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_7_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_7_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 23) { + X.v[0] += ks[1]; + X.v[1] += ks[2]; + X.v[2] += ks[3]; + X.v[3] += ks[4]; + X.v[4 - 1] += 6; + } + if (Nrounds > 24) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_0_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_0_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 25) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_1_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_1_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 26) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_2_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_2_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 27) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_3_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_3_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 27) { + X.v[0] += ks[2]; + X.v[1] += ks[3]; + X.v[2] += ks[4]; + X.v[3] += ks[0]; + X.v[4 - 1] += 7; + } + if (Nrounds > 28) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_4_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_4_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 29) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_5_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_5_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 30) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_6_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_6_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 31) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_7_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_7_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 31) { + X.v[0] += ks[3]; + X.v[1] += ks[4]; + X.v[2] += ks[0]; + X.v[3] += ks[1]; + X.v[4 - 1] += 8; + } + if (Nrounds > 32) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_0_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_0_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 33) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_1_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_1_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 34) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_2_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_2_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 35) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_3_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_3_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 35) { + X.v[0] += ks[4]; + X.v[1] += ks[0]; + X.v[2] += ks[1]; + X.v[3] += ks[2]; + X.v[4 - 1] += 9; + } + if (Nrounds > 36) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_4_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_4_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 37) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_5_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_5_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 38) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_6_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_6_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 39) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_7_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_7_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 39) { + X.v[0] += ks[0]; + X.v[1] += ks[1]; + X.v[2] += ks[2]; + X.v[3] += ks[3]; + X.v[4 - 1] += 10; + } + if (Nrounds > 40) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_0_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_0_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 41) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_1_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_1_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 42) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_2_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_2_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 43) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_3_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_3_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 43) { + X.v[0] += ks[1]; + X.v[1] += ks[2]; + X.v[2] += ks[3]; + X.v[3] += ks[4]; + X.v[4 - 1] += 11; + } + if (Nrounds > 44) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_4_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_4_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 45) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_5_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_5_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 46) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_6_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_6_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 47) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_7_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_7_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 47) { + X.v[0] += ks[2]; + X.v[1] += ks[3]; + X.v[2] += ks[4]; + X.v[3] += ks[0]; + X.v[4 - 1] += 12; + } + if (Nrounds > 48) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_0_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_0_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 49) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_1_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_1_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 50) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_2_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_2_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 51) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_3_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_3_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 51) { + X.v[0] += ks[3]; + X.v[1] += ks[4]; + X.v[2] += ks[0]; + X.v[3] += ks[1]; + X.v[4 - 1] += 13; + } + if (Nrounds > 52) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_4_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_4_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 53) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_5_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_5_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 54) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_6_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_6_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 55) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_7_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_7_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 55) { + X.v[0] += ks[4]; + X.v[1] += ks[0]; + X.v[2] += ks[1]; + X.v[3] += ks[2]; + X.v[4 - 1] += 14; + } + if (Nrounds > 56) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_0_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_0_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 57) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_1_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_1_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 58) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_2_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_2_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 59) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_3_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_3_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 59) { + X.v[0] += ks[0]; + X.v[1] += ks[1]; + X.v[2] += ks[2]; + X.v[3] += ks[3]; + X.v[4 - 1] += 15; + } + if (Nrounds > 60) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_4_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_4_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 61) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_5_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_5_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 62) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_6_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_6_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 63) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_7_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_7_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 63) { + X.v[0] += ks[1]; + X.v[1] += ks[2]; + X.v[2] += ks[3]; + X.v[3] += ks[4]; + X.v[4 - 1] += 16; + } + if (Nrounds > 64) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_0_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_0_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 65) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_1_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_1_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 66) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_2_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_2_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 67) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_3_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_3_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 67) { + X.v[0] += ks[2]; + X.v[1] += ks[3]; + X.v[2] += ks[4]; + X.v[3] += ks[0]; + X.v[4 - 1] += 17; + } + if (Nrounds > 68) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_4_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_4_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 69) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_5_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_5_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 70) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_6_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_6_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 71) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_7_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_7_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 71) { + X.v[0] += ks[3]; + X.v[1] += ks[4]; + X.v[2] += ks[0]; + X.v[3] += ks[1]; + X.v[4 - 1] += 18; + } + return X; +} +enum r123_enum_threefry4x32 { threefry4x32_rounds = 20 }; +static INLINE threefry4x32_ctr_t threefry4x32(threefry4x32_ctr_t in, + threefry4x32_key_t k); +static INLINE threefry4x32_ctr_t threefry4x32(threefry4x32_ctr_t in, + threefry4x32_key_t k) { + return threefry4x32_R(threefry4x32_rounds, in, k); +} + +typedef struct s_threefry32_state { + threefry4x32_key_t *ctr; + threefry4x32_ctr_t *key; + int buffer_pos; + uint32_t buffer[THREEFRY_BUFFER_SIZE]; +} threefry32_state; + +static INLINE uint32_t threefry32_next(threefry32_state *state) { + int i; + threefry4x32_ctr_t ct; + uint32_t out; + if (state->buffer_pos < THREEFRY_BUFFER_SIZE) { + out = state->buffer[state->buffer_pos]; + state->buffer_pos++; + return out; + } + /* generate 4 new uint64_t */ + state->ctr->v[0]++; + /* Handle carry */ + if (state->ctr->v[0] == 0) { + state->ctr->v[1]++; + if (state->ctr->v[1] == 0) { + state->ctr->v[2]++; + if (state->ctr->v[2] == 0) { + state->ctr->v[3]++; + } + } + } + ct = threefry4x32_R(threefry4x32_rounds, *state->ctr, *state->key); + for (i = 0; i < 4; i++) { + state->buffer[i] = ct.v[i]; + } + state->buffer_pos = 1; + return state->buffer[0]; +} + +static INLINE uint64_t threefry32_next64(threefry32_state *state) { + return ((uint64_t)threefry32_next(state) << 32) | threefry32_next(state); +} + +static INLINE uint32_t threefry32_next32(threefry32_state *state) { + return threefry32_next(state); +} + +static INLINE double threefry32_next_double(threefry32_state *state) { + int32_t a = threefry32_next(state) >> 5, b = threefry32_next(state) >> 6; + return (a * 67108864.0 + b) / 9007199254740992.0; +} + +extern void threefry32_jump(threefry32_state *state); + +extern void threefry32_advance(uint32_t *step, threefry32_state *state); + +#endif diff --git a/numpy/random/src/xoroshiro128/LICENSE.md b/numpy/random/src/xoroshiro128/LICENSE.md new file mode 100644 index 000000000..969430149 --- /dev/null +++ b/numpy/random/src/xoroshiro128/LICENSE.md @@ -0,0 +1,9 @@ +# XOROSHIRO128 + +Written in 2016 by David Blackman and Sebastiano Vigna (vigna@acm.org) + +To the extent possible under law, the author has dedicated all copyright +and related and neighboring rights to this software to the public domain +worldwide. This software is distributed without any warranty. + +See <http://creativecommons.org/publicdomain/zero/1.0/>.
\ No newline at end of file diff --git a/numpy/random/src/xoroshiro128/xoroshiro128-benchmark.c b/numpy/random/src/xoroshiro128/xoroshiro128-benchmark.c new file mode 100644 index 000000000..9a7b52bfb --- /dev/null +++ b/numpy/random/src/xoroshiro128/xoroshiro128-benchmark.c @@ -0,0 +1,35 @@ +/* + * cl xoroshiro128-benchmark.c xoroshiro128plus.orig.c \ + * ../splitmix64/splitmix64.c /Ox + * + * gcc -O3 xoroshiro128-benchmark.c xoroshiro128plus.orig.c \ + * ../splitmix64/splitmix64.c -o xoroshiro128-benchmark + * + */ +#include "../splitmix64/splitmix64.h" +#include "xoroshiro128plus.orig.h" +#include <inttypes.h> +#include <stdio.h> +#include <time.h> + +#define N 1000000000 + +int main() +{ + uint64_t count = 0, sum = 0; + uint64_t seed = 0xDEADBEAF; + s[0] = splitmix64_next(&seed); + s[1] = splitmix64_next(&seed); + int i; + clock_t begin = clock(); + for (i = 0; i < N; i++) + { + sum += next(); + count++; + } + clock_t end = clock(); + double time_spent = (double)(end - begin) / CLOCKS_PER_SEC; + printf("0x%" PRIx64 "\ncount: %" PRIu64 "\n", sum, count); + printf("%" PRIu64 " randoms per second\n", + (uint64_t)(N / time_spent) / 1000000 * 1000000); +} diff --git a/numpy/random/src/xoroshiro128/xoroshiro128-test-data-gen.c b/numpy/random/src/xoroshiro128/xoroshiro128-test-data-gen.c new file mode 100644 index 000000000..d50e63f5e --- /dev/null +++ b/numpy/random/src/xoroshiro128/xoroshiro128-test-data-gen.c @@ -0,0 +1,83 @@ +/* + * Generate testing csv files + * + * cl xoroshiro128-test-data-gen.c xoroshiro128plus.orig.c / + * ../splitmix64/splitmix64.c /Ox + * xoroshiro128-test-data-gen.exe * + * + * gcc xoroshiro128-test-data-gen.c xoroshiro128plus.orig.c / + * ../splitmix64/splitmix64.c -o xoroshiro128-test-data-gen + * ./xoroshiro128-test-data-gen + * + * Requres the Random123 directory containing header files to be located in the + * same directory (not included). + * + */ + +#include "../splitmix64/splitmix64.h" +#include "xoroshiro128plus.orig.h" +#include <inttypes.h> +#include <stdio.h> + +#define N 1000 + +int main() +{ + uint64_t sum = 0; + uint64_t state, seed = 0xDEADBEAF; + state = seed; + int i; + for (i = 0; i < 2; i++) + { + s[i] = splitmix64_next(&state); + } + uint64_t store[N]; + for (i = 0; i < N; i++) + { + store[i] = next(); + } + + FILE *fp; + fp = fopen("xoroshiro128-testset-1.csv", "w"); + if (fp == NULL) + { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) + { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) + { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); + + seed = state = 0; + for (i = 0; i < 2; i++) + { + s[i] = splitmix64_next(&state); + } + for (i = 0; i < N; i++) + { + store[i] = next(); + } + fp = fopen("xoroshiro128-testset-2.csv", "w"); + if (fp == NULL) + { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) + { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) + { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); +} diff --git a/numpy/random/src/xoroshiro128/xoroshiro128.c b/numpy/random/src/xoroshiro128/xoroshiro128.c new file mode 100644 index 000000000..060eb8a51 --- /dev/null +++ b/numpy/random/src/xoroshiro128/xoroshiro128.c @@ -0,0 +1,60 @@ +/* Written in 2016-2018 by David Blackman and Sebastiano Vigna (vigna@acm.org) + +To the extent possible under law, the author has dedicated all copyright +and related and neighboring rights to this software to the public domain +worldwide. This software is distributed without any warranty. + +See <http://creativecommons.org/publicdomain/zero/1.0/>. */ + +/* This is xoroshiro128+ 1.0, our best and fastest small-state generator + for floating-point numbers. We suggest to use its upper bits for + floating-point generation, as it is slightly faster than + xoroshiro128**. It passes all tests we are aware of except for the four + lower bits, which might fail linearity tests (and just those), so if + low linear complexity is not considered an issue (as it is usually the + case) it can be used to generate 64-bit outputs, too; moreover, this + generator has a very mild Hamming-weight dependency making our test + (http://prng.di.unimi.it/hwd.php) fail after 5 TB of output; we believe + this slight bias cannot affect any application. If you are concerned, + use xoroshiro128** or xoshiro256+. + + We suggest to use a sign test to extract a random Boolean value, and + right shifts to extract subsets of bits. + + The state must be seeded so that it is not everywhere zero. If you have + a 64-bit seed, we suggest to seed a splitmix64 generator and use its + output to fill s. + + NOTE: the parameters (a=24, b=16, b=37) of this version give slightly + better results in our test than the 2016 version (a=55, b=14, c=36). +*/ + +#include "xoroshiro128.h" + +extern INLINE uint64_t xoroshiro128_next64(xoroshiro128_state *state); + +extern INLINE uint32_t xoroshiro128_next32(xoroshiro128_state *state); + +void xoroshiro128_jump(xoroshiro128_state *state) +{ + int i, b; + uint64_t s0; + uint64_t s1; + static const uint64_t JUMP[] = {0xdf900294d8f554a5, 0x170865df4b3201fc}; + + s0 = 0; + s1 = 0; + for (i = 0; i < sizeof JUMP / sizeof *JUMP; i++) + for (b = 0; b < 64; b++) + { + if (JUMP[i] & UINT64_C(1) << b) + { + s0 ^= state->s[0]; + s1 ^= state->s[1]; + } + xoroshiro128_next(&state->s[0]); + } + + state->s[0] = s0; + state->s[1] = s1; +} diff --git a/numpy/random/src/xoroshiro128/xoroshiro128.h b/numpy/random/src/xoroshiro128/xoroshiro128.h new file mode 100644 index 000000000..0db82b173 --- /dev/null +++ b/numpy/random/src/xoroshiro128/xoroshiro128.h @@ -0,0 +1,63 @@ +#ifndef _RANDOMDGEN__XOROSHIRO128_H_ +#define _RANDOMDGEN__XOROSHIRO128_H_ + +#ifdef _WIN32 +#if _MSC_VER == 1500 +#include "../common/inttypes.h" +#define INLINE __forceinline +#else +#include <inttypes.h> +#define INLINE __inline __forceinline +#endif +#else +#include <inttypes.h> +#define INLINE inline +#endif + +typedef struct s_xoroshiro128_state +{ + uint64_t s[2]; + int has_uint32; + uint32_t uinteger; +} xoroshiro128_state; + +static INLINE uint64_t rotl(const uint64_t x, int k) +{ + return (x << k) | (x >> (64 - k)); +} + +static INLINE uint64_t xoroshiro128_next(uint64_t *s) +{ + const uint64_t s0 = s[0]; + uint64_t s1 = s[1]; + const uint64_t result = s0 + s1; + + s1 ^= s0; + s[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16); // a, b + s[1] = rotl(s1, 37); // c + + return result; +} + +static INLINE uint64_t xoroshiro128_next64(xoroshiro128_state *state) +{ + return xoroshiro128_next(&state->s[0]); +} + +static INLINE uint32_t xoroshiro128_next32(xoroshiro128_state *state) +{ + uint64_t next; + if (state->has_uint32) + { + state->has_uint32 = 0; + return state->uinteger; + } + next = xoroshiro128_next(&state->s[0]); + state->has_uint32 = 1; + state->uinteger = (uint32_t)(next >> 32); + return (uint32_t)(next & 0xffffffff); +} + +void xoroshiro128_jump(xoroshiro128_state *state); + +#endif diff --git a/numpy/random/src/xoroshiro128/xoroshiro128plus.orig.c b/numpy/random/src/xoroshiro128/xoroshiro128plus.orig.c new file mode 100644 index 000000000..1b5f46e4b --- /dev/null +++ b/numpy/random/src/xoroshiro128/xoroshiro128plus.orig.c @@ -0,0 +1,102 @@ +/* Written in 2016-2018 by David Blackman and Sebastiano Vigna (vigna@acm.org) + +To the extent possible under law, the author has dedicated all copyright +and related and neighboring rights to this software to the public domain +worldwide. This software is distributed without any warranty. + +See <http://creativecommons.org/publicdomain/zero/1.0/>. */ + +#include <stdint.h> + +/* This is xoroshiro128+ 1.0, our best and fastest small-state generator + for floating-point numbers. We suggest to use its upper bits for + floating-point generation, as it is slightly faster than + xoroshiro128**. It passes all tests we are aware of except for the four + lower bits, which might fail linearity tests (and just those), so if + low linear complexity is not considered an issue (as it is usually the + case) it can be used to generate 64-bit outputs, too; moreover, this + generator has a very mild Hamming-weight dependency making our test + (http://prng.di.unimi.it/hwd.php) fail after 5 TB of output; we believe + this slight bias cannot affect any application. If you are concerned, + use xoroshiro128** or xoshiro256+. + + We suggest to use a sign test to extract a random Boolean value, and + right shifts to extract subsets of bits. + + The state must be seeded so that it is not everywhere zero. If you have + a 64-bit seed, we suggest to seed a splitmix64 generator and use its + output to fill s. + + NOTE: the parameters (a=24, b=16, b=37) of this version give slightly + better results in our test than the 2016 version (a=55, b=14, c=36). +*/ + +uint64_t s[2]; + +static inline uint64_t rotl(const uint64_t x, int k) +{ + return (x << k) | (x >> (64 - k)); +} + +uint64_t next(void) +{ + const uint64_t s0 = s[0]; + uint64_t s1 = s[1]; + const uint64_t result = s0 + s1; + + s1 ^= s0; + s[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16); // a, b + s[1] = rotl(s1, 37); // c + + return result; +} + +/* This is the jump function for the generator. It is equivalent + to 2^64 calls to next(); it can be used to generate 2^64 + non-overlapping subsequences for parallel computations. */ + +void jump(void) +{ + static const uint64_t JUMP[] = {0xdf900294d8f554a5, 0x170865df4b3201fc}; + + uint64_t s0 = 0; + uint64_t s1 = 0; + for (int i = 0; i < sizeof JUMP / sizeof *JUMP; i++) + for (int b = 0; b < 64; b++) + { + if (JUMP[i] & UINT64_C(1) << b) + { + s0 ^= s[0]; + s1 ^= s[1]; + } + next(); + } + s[0] = s0; + s[1] = s1; +} + +/* This is the long-jump function for the generator. It is equivalent to + 2^96 calls to next(); it can be used to generate 2^32 starting points, + from each of which jump() will generate 2^32 non-overlapping + subsequences for parallel distributed computations. */ + +void long_jump(void) +{ + static const uint64_t LONG_JUMP[] = {0xd2a98b26625eee7b, 0xdddf9b1090aa7ac1}; + + uint64_t s0 = 0; + uint64_t s1 = 0; + for (int i = 0; i < sizeof LONG_JUMP / sizeof *LONG_JUMP; i++) + for (int b = 0; b < 64; b++) + { + if (LONG_JUMP[i] & UINT64_C(1) << b) + { + s0 ^= s[0]; + s1 ^= s[1]; + } + next(); + } + + s[0] = s0; + s[1] = s1; +} diff --git a/numpy/random/src/xoroshiro128/xoroshiro128plus.orig.h b/numpy/random/src/xoroshiro128/xoroshiro128plus.orig.h new file mode 100644 index 000000000..20c96fe04 --- /dev/null +++ b/numpy/random/src/xoroshiro128/xoroshiro128plus.orig.h @@ -0,0 +1,5 @@ +#include <stdint.h> + +uint64_t s[2]; +uint64_t next(void); +void jump(void); diff --git a/numpy/random/src/xorshift1024/LICENSE.md b/numpy/random/src/xorshift1024/LICENSE.md new file mode 100644 index 000000000..3ca8ed4b9 --- /dev/null +++ b/numpy/random/src/xorshift1024/LICENSE.md @@ -0,0 +1,9 @@ +# XORSHIFT1024 + +Written in 2017 by Sebastiano Vigna (vigna@acm.org) + +To the extent possible under law, the author has dedicated all copyright +and related and neighboring rights to this software to the public domain +worldwide. This software is distributed without any warranty. + +See <http://creativecommons.org/publicdomain/zero/1.0/>.
\ No newline at end of file diff --git a/numpy/random/src/xorshift1024/xorshift1024-benchmark.c b/numpy/random/src/xorshift1024/xorshift1024-benchmark.c new file mode 100644 index 000000000..0eef33537 --- /dev/null +++ b/numpy/random/src/xorshift1024/xorshift1024-benchmark.c @@ -0,0 +1,35 @@ +/* + * cl xorshift1024-benchmark.c xorshift2014.orig.c + * ../splitmix64/splitmix64.c /Ox + * + * gcc -O3 xorshift1024-benchmark.c xorshift2014.orig.c / + * ../splitmix64/splitmix64.c -o xorshift1024-benchmark + * + */ +#include "../splitmix64/splitmix64.h" +#include "xorshift1024.orig.h" +#include <inttypes.h> +#include <stdio.h> +#include <time.h> + +#define N 1000000000 + +int main() { + uint64_t count = 0, sum = 0; + uint64_t seed = 0xDEADBEAF; + int i; + for (i = 0; i < 16; i++) { + s[i] = splitmix64_next(&seed); + } + p = 0; + clock_t begin = clock(); + for (i = 0; i < N; i++) { + sum += next(); + count++; + } + clock_t end = clock(); + double time_spent = (double)(end - begin) / CLOCKS_PER_SEC; + printf("0x%" PRIx64 "\ncount: %" PRIu64 "\n", sum, count); + printf("%" PRIu64 " randoms per second\n", + (uint64_t)(N / time_spent) / 1000000 * 1000000); +} diff --git a/numpy/random/src/xorshift1024/xorshift1024-test-data-gen.c b/numpy/random/src/xorshift1024/xorshift1024-test-data-gen.c new file mode 100644 index 000000000..a2ae08df4 --- /dev/null +++ b/numpy/random/src/xorshift1024/xorshift1024-test-data-gen.c @@ -0,0 +1,74 @@ +/* + * Generate testing csv files + * + * cl xorshift1024-test-data-gen.c xorshift1024.orig.c / + * ../splitmix64/splitmix64.c /Ox + * xorshift1024-test-data-gen.exe * + * + * gcc xorshift1024-test-data-gen.c xorshift1024.orig.c / + * ../splitmix64/splitmix64.c -o xorshift1024-test-data-gen + * ./xorshift1024-test-data-gen + * + * Requres the Random123 directory containing header files to be located in the + * same directory (not included). + * + */ + +#include "../splitmix64/splitmix64.h" +#include "xorshift1024.orig.h" +#include <inttypes.h> +#include <stdio.h> + +#define N 1000 + +int main() { + uint64_t sum = 0; + uint64_t state, seed = 0xDEADBEAF; + state = seed; + int i; + for (i = 0; i < 16; i++) { + s[i] = splitmix64_next(&state); + } + p = 0; + uint64_t store[N]; + for (i = 0; i < N; i++) { + store[i] = next(); + } + + FILE *fp; + fp = fopen("xorshift1024-testset-1.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); + + seed = state = 0; + for (i = 0; i < 16; i++) { + s[i] = splitmix64_next(&state); + } + p = 0; + for (i = 0; i < N; i++) { + store[i] = next(); + } + fp = fopen("xorshift1024-testset-2.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); +} diff --git a/numpy/random/src/xorshift1024/xorshift1024.c b/numpy/random/src/xorshift1024/xorshift1024.c new file mode 100644 index 000000000..8737b5a82 --- /dev/null +++ b/numpy/random/src/xorshift1024/xorshift1024.c @@ -0,0 +1,32 @@ +#include "xorshift1024.h" + +/* This is the jump function for the generator. It is equivalent + to 2^512 calls to next(); it can be used to generate 2^512 + non-overlapping subsequences for parallel computations. */ + +extern INLINE uint64_t xorshift1024_next(xorshift1024_state *state); +extern INLINE uint64_t xorshift1024_next64(xorshift1024_state *state); +extern INLINE uint32_t xorshift1024_next32(xorshift1024_state *state); + +void xorshift1024_jump(xorshift1024_state *state) { + int i, j, b; + static const uint64_t JUMP[] = { + 0x84242f96eca9c41d, 0xa3c65b8776f96855, 0x5b34a39f070b5837, + 0x4489affce4f31a1e, 0x2ffeeb0a48316f40, 0xdc2d9891fe68c022, + 0x3659132bb12fea70, 0xaac17d8efa43cab8, 0xc4cb815590989b13, + 0x5ee975283d71c93b, 0x691548c86c1bd540, 0x7910c41d10a1e6a5, + 0x0b5fc64563b3e2a8, 0x047f7684e9fc949d, 0xb99181f2d8f685ca, + 0x284600e3f30e38c3}; + + uint64_t t[16] = {0}; + for (i = 0; i < sizeof JUMP / sizeof *JUMP; i++) + for (b = 0; b < 64; b++) { + if (JUMP[i] & UINT64_C(1) << b) + for (j = 0; j < 16; j++) + t[j] ^= state->s[(j + state->p) & 15]; + xorshift1024_next(state); + } + + for (j = 0; j < 16; j++) + state->s[(j + state->p) & 15] = t[j]; +} diff --git a/numpy/random/src/xorshift1024/xorshift1024.h b/numpy/random/src/xorshift1024/xorshift1024.h new file mode 100644 index 000000000..e0ef77826 --- /dev/null +++ b/numpy/random/src/xorshift1024/xorshift1024.h @@ -0,0 +1,50 @@ +#ifndef _RANDOMDGEN__XORSHIFT1024_H_ +#define _RANDOMDGEN__XORSHIFT1024_H_ + +#ifdef _WIN32 +#if _MSC_VER == 1500 +#include "../common/inttypes.h" +#define INLINE __forceinline +#else +#include <inttypes.h> +#define INLINE __inline __forceinline +#endif +#else +#include <inttypes.h> +#define INLINE inline +#endif + +typedef struct s_xorshift1024_state { + uint64_t s[16]; + int p; + int has_uint32; + uint32_t uinteger; +} xorshift1024_state; + +static INLINE uint64_t xorshift1024_next(xorshift1024_state *state) { + const uint64_t s0 = state->s[state->p]; + uint64_t s1 = state->s[state->p = ((state->p) + 1) & 15]; + s1 ^= s1 << 31; // a + state->s[state->p] = s1 ^ s0 ^ (s1 >> 11) ^ (s0 >> 30); // b,c + return state->s[state->p] * 0x9e3779b97f4a7c13; +} + +static INLINE uint64_t xorshift1024_next64(xorshift1024_state *state) { + return xorshift1024_next(state); +} + +static INLINE uint32_t xorshift1024_next32(xorshift1024_state *state) { + uint64_t next; + if (state->has_uint32) { + state->has_uint32 = 0; + return state->uinteger; + } + next = xorshift1024_next(state); + state->has_uint32 = 1; + state->uinteger = (uint32_t)(next >> 32); + return (uint32_t)(next & 0xffffffff); +} + +void xorshift1024_jump(xorshift1024_state *state); + +#endif diff --git a/numpy/random/src/xorshift1024/xorshift1024.orig.c b/numpy/random/src/xorshift1024/xorshift1024.orig.c new file mode 100644 index 000000000..03c1c17fe --- /dev/null +++ b/numpy/random/src/xorshift1024/xorshift1024.orig.c @@ -0,0 +1,68 @@ +/* Written in 2017 by Sebastiano Vigna (vigna@acm.org) + +To the extent possible under law, the author has dedicated all copyright +and related and neighboring rights to this software to the public domain +worldwide. This software is distributed without any warranty. + +See <http://creativecommons.org/publicdomain/zero/1.0/>. */ + +#include <stdint.h> +#include <string.h> + +/* NOTE: as of 2017-10-08, this generator has a different multiplier (a + fixed-point representation of the golden ratio), which eliminates + linear dependencies from one of the lowest bits. The previous + multiplier was 1181783497276652981 (M_8 in the paper). If you need to + tell apart the two generators, you can refer to this generator as + xorshift1024φ and to the previous one as xorshift1024*M_8. + + This is a fast, high-quality generator. If 1024 bits of state are too + much, try a xoroshiro128+ generator. + + Note that the two lowest bits of this generator are LFSRs of degree + 1024, and thus will fail binary rank tests. The other bits needs a much + higher degree to be represented as LFSRs. + + We suggest to use a sign test to extract a random Boolean value, and + right shifts to extract subsets of bits. + + The state must be seeded so that it is not everywhere zero. If you have + a 64-bit seed, we suggest to seed a splitmix64 generator and use its + output to fill s. */ + +uint64_t s[16]; +int p; + +uint64_t next(void) { + const uint64_t s0 = s[p]; + uint64_t s1 = s[p = (p + 1) & 15]; + s1 ^= s1 << 31; // a + s[p] = s1 ^ s0 ^ (s1 >> 11) ^ (s0 >> 30); // b,c + return s[p] * 0x9e3779b97f4a7c13; +} + +/* This is the jump function for the generator. It is equivalent + to 2^512 calls to next(); it can be used to generate 2^512 + non-overlapping subsequences for parallel computations. */ + +void jump(void) { + static const uint64_t JUMP[] = { + 0x84242f96eca9c41d, 0xa3c65b8776f96855, 0x5b34a39f070b5837, + 0x4489affce4f31a1e, 0x2ffeeb0a48316f40, 0xdc2d9891fe68c022, + 0x3659132bb12fea70, 0xaac17d8efa43cab8, 0xc4cb815590989b13, + 0x5ee975283d71c93b, 0x691548c86c1bd540, 0x7910c41d10a1e6a5, + 0x0b5fc64563b3e2a8, 0x047f7684e9fc949d, 0xb99181f2d8f685ca, + 0x284600e3f30e38c3}; + + uint64_t t[16] = {0}; + for (int i = 0; i < sizeof JUMP / sizeof *JUMP; i++) + for (int b = 0; b < 64; b++) { + if (JUMP[i] & UINT64_C(1) << b) + for (int j = 0; j < 16; j++) + t[j] ^= s[(j + p) & 15]; + next(); + } + + for (int j = 0; j < 16; j++) + s[(j + p) & 15] = t[j]; +} diff --git a/numpy/random/src/xorshift1024/xorshift1024.orig.h b/numpy/random/src/xorshift1024/xorshift1024.orig.h new file mode 100644 index 000000000..9b7597967 --- /dev/null +++ b/numpy/random/src/xorshift1024/xorshift1024.orig.h @@ -0,0 +1,7 @@ +#include <stdint.h> +#include <string.h> + +uint64_t s[16]; +int p; +uint64_t next(void); +void jump(void); diff --git a/numpy/random/src/xoshiro256starstar/LICENSE.md b/numpy/random/src/xoshiro256starstar/LICENSE.md new file mode 100644 index 000000000..d863f3b29 --- /dev/null +++ b/numpy/random/src/xoshiro256starstar/LICENSE.md @@ -0,0 +1,9 @@ +# XOSHIRO256STARSTAR + +Written in 2018 by David Blackman and Sebastiano Vigna (vigna@acm.org) + +To the extent possible under law, the author has dedicated all copyright +and related and neighboring rights to this software to the public domain +worldwide. This software is distributed without any warranty. + +See <http://creativecommons.org/publicdomain/zero/1.0/>.
\ No newline at end of file diff --git a/numpy/random/src/xoshiro256starstar/xoshiro256starstar-test-data-gen.c b/numpy/random/src/xoshiro256starstar/xoshiro256starstar-test-data-gen.c new file mode 100644 index 000000000..8522229dd --- /dev/null +++ b/numpy/random/src/xoshiro256starstar/xoshiro256starstar-test-data-gen.c @@ -0,0 +1,72 @@ +/* + * Generate testing csv files + * + * cl xoshiro256starstar-test-data-gen.c xoshiro256starstar.orig.c / + * ../splitmix64/splitmix64.c /Ox + * xoshiro256starstar-test-data-gen.exe * + * + * gcc xoshiro256starstar-test-data-gen.c xoshiro256starstar.orig.c / + * ../splitmix64/splitmix64.c -o xoshiro256starstar-test-data-gen + * ./xoshiro256starstar-test-data-gen + * + * Requres the Random123 directory containing header files to be located in the + * same directory (not included). + * + */ + +#include "../splitmix64/splitmix64.h" +#include "xoshiro256starstar.orig.h" +#include <inttypes.h> +#include <stdio.h> + +#define N 1000 + +int main() { + uint64_t sum = 0; + uint64_t state, seed = 0xDEADBEAF; + state = seed; + int i; + for (i = 0; i < 4; i++) { + s[i] = splitmix64_next(&state); + } + uint64_t store[N]; + for (i = 0; i < N; i++) { + store[i] = next(); + } + + FILE *fp; + fp = fopen("xoshiro256starstar-testset-1.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); + + seed = state = 0; + for (i = 0; i < 4; i++) { + s[i] = splitmix64_next(&state); + } + for (i = 0; i < N; i++) { + store[i] = next(); + } + fp = fopen("xoshiro256starstar-testset-2.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); +} diff --git a/numpy/random/src/xoshiro256starstar/xoshiro256starstar.c b/numpy/random/src/xoshiro256starstar/xoshiro256starstar.c new file mode 100644 index 000000000..30b6c7d85 --- /dev/null +++ b/numpy/random/src/xoshiro256starstar/xoshiro256starstar.c @@ -0,0 +1,55 @@ +/* Written in 2018 by David Blackman and Sebastiano Vigna (vigna@acm.org) + +To the extent possible under law, the author has dedicated all copyright +and related and neighboring rights to this software to the public domain +worldwide. This software is distributed without any warranty. + +See <http://creativecommons.org/publicdomain/zero/1.0/>. */ + +#include "xoshiro256starstar.h" + +/* This is xoshiro256** 1.0, our all-purpose, rock-solid generator. It has + excellent (sub-ns) speed, a state (256 bits) that is large enough for + any parallel application, and it passes all tests we are aware of. + + For generating just floating-point numbers, xoshiro256+ is even faster. + + The state must be seeded so that it is not everywhere zero. If you have + a 64-bit seed, we suggest to seed a splitmix64 generator and use its + output to fill s. */ + +extern INLINE uint64_t xoshiro256starstar_next64(xoshiro256starstar_state *state); + +extern INLINE uint32_t xoshiro256starstar_next32(xoshiro256starstar_state *state); + +/* This is the jump function for the generator. It is equivalent + to 2^128 calls to next(); it can be used to generate 2^128 + non-overlapping subsequences for parallel computations. */ + +void xoshiro256starstar_jump(xoshiro256starstar_state *state) +{ + int i, b; + static const uint64_t JUMP[] = {0x180ec6d33cfd0aba, 0xd5a61266f0c9392c, 0xa9582618e03fc9aa, 0x39abdc4529b1661c}; + + uint64_t s0 = 0; + uint64_t s1 = 0; + uint64_t s2 = 0; + uint64_t s3 = 0; + for (i = 0; i < sizeof JUMP / sizeof *JUMP; i++) + for (b = 0; b < 64; b++) + { + if (JUMP[i] & UINT64_C(1) << b) + { + s0 ^= state->s[0]; + s1 ^= state->s[1]; + s2 ^= state->s[2]; + s3 ^= state->s[3]; + } + xoshiro256starstar_next(&state->s[0]); + } + + state->s[0] = s0; + state->s[1] = s1; + state->s[2] = s2; + state->s[3] = s3; +} diff --git a/numpy/random/src/xoshiro256starstar/xoshiro256starstar.h b/numpy/random/src/xoshiro256starstar/xoshiro256starstar.h new file mode 100644 index 000000000..1d7d8ea40 --- /dev/null +++ b/numpy/random/src/xoshiro256starstar/xoshiro256starstar.h @@ -0,0 +1,63 @@ +#ifndef _RANDOMDGEN__XOSHIRO256STARSTAR_H_ +#define _RANDOMDGEN__XOSHIRO256STARSTAR_H_ + +#ifdef _WIN32 +#if _MSC_VER == 1500 +#include "../common/inttypes.h" +#define INLINE __forceinline +#else +#include <inttypes.h> +#define INLINE __inline __forceinline +#endif +#else +#include <inttypes.h> +#define INLINE inline +#endif + +typedef struct s_xoshiro256starstar_state { + uint64_t s[4]; + int has_uint32; + uint32_t uinteger; +} xoshiro256starstar_state; + +static INLINE uint64_t rotl(const uint64_t x, int k) { + return (x << k) | (x >> (64 - k)); +} + +static INLINE uint64_t xoshiro256starstar_next(uint64_t *s) { + const uint64_t result_starstar = rotl(s[1] * 5, 7) * 9; + const uint64_t t = s[1] << 17; + + s[2] ^= s[0]; + s[3] ^= s[1]; + s[1] ^= s[2]; + s[0] ^= s[3]; + + s[2] ^= t; + + s[3] = rotl(s[3], 45); + + return result_starstar; +} + +static INLINE uint64_t +xoshiro256starstar_next64(xoshiro256starstar_state *state) { + return xoshiro256starstar_next(&state->s[0]); +} + +static INLINE uint32_t +xoshiro256starstar_next32(xoshiro256starstar_state *state) { + uint64_t next; + if (state->has_uint32) { + state->has_uint32 = 0; + return state->uinteger; + } + next = xoshiro256starstar_next(&state->s[0]); + state->has_uint32 = 1; + state->uinteger = (uint32_t)(next >> 32); + return (uint32_t)(next & 0xffffffff); +} + +void xoshiro256starstar_jump(xoshiro256starstar_state *state); + +#endif diff --git a/numpy/random/src/xoshiro256starstar/xoshiro256starstar.orig.c b/numpy/random/src/xoshiro256starstar/xoshiro256starstar.orig.c new file mode 100644 index 000000000..ecf87bab9 --- /dev/null +++ b/numpy/random/src/xoshiro256starstar/xoshiro256starstar.orig.c @@ -0,0 +1,103 @@ +/* Written in 2018 by David Blackman and Sebastiano Vigna (vigna@acm.org) + +To the extent possible under law, the author has dedicated all copyright +and related and neighboring rights to this software to the public domain +worldwide. This software is distributed without any warranty. + +See <http://creativecommons.org/publicdomain/zero/1.0/>. */ + +#include <stdint.h> + +/* This is xoshiro256** 1.0, our all-purpose, rock-solid generator. It has + excellent (sub-ns) speed, a state (256 bits) that is large enough for + any parallel application, and it passes all tests we are aware of. + + For generating just floating-point numbers, xoshiro256+ is even faster. + + The state must be seeded so that it is not everywhere zero. If you have + a 64-bit seed, we suggest to seed a splitmix64 generator and use its + output to fill s. */ + +static inline uint64_t rotl(const uint64_t x, int k) { + return (x << k) | (x >> (64 - k)); +} + + +uint64_t s[4]; + +uint64_t next(void) { + const uint64_t result_starstar = rotl(s[1] * 5, 7) * 9; + + const uint64_t t = s[1] << 17; + + s[2] ^= s[0]; + s[3] ^= s[1]; + s[1] ^= s[2]; + s[0] ^= s[3]; + + s[2] ^= t; + + s[3] = rotl(s[3], 45); + + return result_starstar; +} + + +/* This is the jump function for the generator. It is equivalent + to 2^128 calls to next(); it can be used to generate 2^128 + non-overlapping subsequences for parallel computations. */ + +void jump(void) { + static const uint64_t JUMP[] = { 0x180ec6d33cfd0aba, 0xd5a61266f0c9392c, 0xa9582618e03fc9aa, 0x39abdc4529b1661c }; + + uint64_t s0 = 0; + uint64_t s1 = 0; + uint64_t s2 = 0; + uint64_t s3 = 0; + for(int i = 0; i < sizeof JUMP / sizeof *JUMP; i++) + for(int b = 0; b < 64; b++) { + if (JUMP[i] & UINT64_C(1) << b) { + s0 ^= s[0]; + s1 ^= s[1]; + s2 ^= s[2]; + s3 ^= s[3]; + } + next(); + } + + s[0] = s0; + s[1] = s1; + s[2] = s2; + s[3] = s3; +} + + + +/* This is the long-jump function for the generator. It is equivalent to + 2^192 calls to next(); it can be used to generate 2^64 starting points, + from each of which jump() will generate 2^64 non-overlapping + subsequences for parallel distributed computations. */ + +void long_jump(void) { + static const uint64_t LONG_JUMP[] = { 0x76e15d3efefdcbbf, 0xc5004e441c522fb3, 0x77710069854ee241, 0x39109bb02acbe635 }; + + uint64_t s0 = 0; + uint64_t s1 = 0; + uint64_t s2 = 0; + uint64_t s3 = 0; + for(int i = 0; i < sizeof LONG_JUMP / sizeof *LONG_JUMP; i++) + for(int b = 0; b < 64; b++) { + if (LONG_JUMP[i] & UINT64_C(1) << b) { + s0 ^= s[0]; + s1 ^= s[1]; + s2 ^= s[2]; + s3 ^= s[3]; + } + next(); + } + + s[0] = s0; + s[1] = s1; + s[2] = s2; + s[3] = s3; +}
\ No newline at end of file diff --git a/numpy/random/src/xoshiro256starstar/xoshiro256starstar.orig.h b/numpy/random/src/xoshiro256starstar/xoshiro256starstar.orig.h new file mode 100644 index 000000000..3aa788ec9 --- /dev/null +++ b/numpy/random/src/xoshiro256starstar/xoshiro256starstar.orig.h @@ -0,0 +1,5 @@ +#include <stdint.h> + +uint64_t s[4]; +uint64_t next(void); +void jump(void); diff --git a/numpy/random/src/xoshiro512starstar/LICENSE.md b/numpy/random/src/xoshiro512starstar/LICENSE.md new file mode 100644 index 000000000..aa34c1966 --- /dev/null +++ b/numpy/random/src/xoshiro512starstar/LICENSE.md @@ -0,0 +1,9 @@ +# XOSHIRO512STARSTAR + +Written in 2018 by David Blackman and Sebastiano Vigna (vigna@acm.org) + +To the extent possible under law, the author has dedicated all copyright +and related and neighboring rights to this software to the public domain +worldwide. This software is distributed without any warranty. + +See <http://creativecommons.org/publicdomain/zero/1.0/>. diff --git a/numpy/random/src/xoshiro512starstar/xoshiro512starstar-test-data-gen.c b/numpy/random/src/xoshiro512starstar/xoshiro512starstar-test-data-gen.c new file mode 100644 index 000000000..bcc3574e4 --- /dev/null +++ b/numpy/random/src/xoshiro512starstar/xoshiro512starstar-test-data-gen.c @@ -0,0 +1,72 @@ +/* + * Generate testing csv files + * + * cl xoshiro512starstar-test-data-gen.c xoshiro512starstar.orig.c / + * ../splitmix64/splitmix64.c /Ox + * xoshiro512starstar-test-data-gen.exe * + * + * gcc xoshiro512starstar-test-data-gen.c xoshiro512starstar.orig.c / + * ../splitmix64/splitmix64.c -o xoshiro512starstar-test-data-gen + * ./xoshiro512starstar-test-data-gen + * + * Requres the Random123 directory containing header files to be located in the + * same directory (not included). + * + */ + +#include "../splitmix64/splitmix64.h" +#include "xoshiro512starstar.orig.h" +#include <inttypes.h> +#include <stdio.h> + +#define N 1000 + +int main() { + uint64_t sum = 0; + uint64_t state, seed = 0xDEADBEAF; + state = seed; + int i; + for (i = 0; i < 8; i++) { + s[i] = splitmix64_next(&state); + } + uint64_t store[N]; + for (i = 0; i < N; i++) { + store[i] = next(); + } + + FILE *fp; + fp = fopen("xoshiro512starstar-testset-1.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); + + seed = state = 0; + for (i = 0; i < 8; i++) { + s[i] = splitmix64_next(&state); + } + for (i = 0; i < N; i++) { + store[i] = next(); + } + fp = fopen("xoshiro512starstar-testset-2.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); +} diff --git a/numpy/random/src/xoshiro512starstar/xoshiro512starstar.c b/numpy/random/src/xoshiro512starstar/xoshiro512starstar.c new file mode 100644 index 000000000..a9f56699f --- /dev/null +++ b/numpy/random/src/xoshiro512starstar/xoshiro512starstar.c @@ -0,0 +1,53 @@ +/* Written in 2018 by David Blackman and Sebastiano Vigna (vigna@acm.org) + +To the extent possible under law, the author has dedicated all copyright +and related and neighboring rights to this software to the public domain +worldwide. This software is distributed without any warranty. + +See <http://creativecommons.org/publicdomain/zero/1.0/>. */ + +#include "xoshiro512starstar.h" + +/* This is xoshiro512** 1.0, an all-purpose, rock-solid generator. It has + excellent (about 1ns) speed, an increased state (512 bits) that is + large enough for any parallel application, and it passes all tests we + are aware of. + + For generating just floating-point numbers, xoshiro512+ is even faster. + + The state must be seeded so that it is not everywhere zero. If you have + a 64-bit seed, we suggest to seed a splitmix64 generator and use its + output to fill s. */ + +extern INLINE uint64_t +xoshiro512starstar_next64(xoshiro512starstar_state *state); + +extern INLINE uint32_t +xoshiro512starstar_next32(xoshiro512starstar_state *state); + +/* This is the jump function for the generator. It is equivalent + to 2^256 calls to next(); it can be used to generate 2^256 + non-overlapping subsequences for parallel computations. */ + +static uint64_t s_placeholder[8]; + +void xoshiro512starstar_jump(xoshiro512starstar_state *state) { + + int i, b, w; + static const uint64_t JUMP[] = {0x33ed89b6e7a353f9, 0x760083d7955323be, + 0x2837f2fbb5f22fae, 0x4b8c5674d309511c, + 0xb11ac47a7ba28c25, 0xf1be7667092bcc1c, + 0x53851efdb6df0aaf, 0x1ebbc8b23eaf25db}; + + uint64_t t[sizeof s_placeholder / sizeof *s_placeholder]; + memset(t, 0, sizeof t); + for (i = 0; i < sizeof JUMP / sizeof *JUMP; i++) + for (b = 0; b < 64; b++) { + if (JUMP[i] & UINT64_C(1) << b) + for (w = 0; w < sizeof s_placeholder / sizeof *s_placeholder; w++) + t[w] ^= state->s[w]; + xoshiro512starstar_next(&state->s[0]); + } + + memcpy(state->s, t, sizeof s_placeholder); +} diff --git a/numpy/random/src/xoshiro512starstar/xoshiro512starstar.h b/numpy/random/src/xoshiro512starstar/xoshiro512starstar.h new file mode 100644 index 000000000..0fa0ba3cd --- /dev/null +++ b/numpy/random/src/xoshiro512starstar/xoshiro512starstar.h @@ -0,0 +1,75 @@ +#ifndef _RANDOMDGEN__XOSHIRO512STARSTAR_H_ +#define _RANDOMDGEN__XOSHIRO512STARSTAR_H_ + +#ifdef _WIN32 +#if _MSC_VER == 1500 +#include "../common/inttypes.h" +#define INLINE __forceinline +#else +#include <inttypes.h> +#define INLINE __inline __forceinline +#endif +#else +#include <inttypes.h> +#define INLINE inline +#endif +#include <string.h> + +typedef struct s_xoshiro512starstar_state +{ + uint64_t s[8]; + int has_uint32; + uint32_t uinteger; +} xoshiro512starstar_state; + +static INLINE uint64_t rotl(const uint64_t x, int k) +{ + return (x << k) | (x >> (64 - k)); +} + +static INLINE uint64_t xoshiro512starstar_next(uint64_t *s) +{ + const uint64_t result_starstar = rotl(s[1] * 5, 7) * 9; + + const uint64_t t = s[1] << 11; + + s[2] ^= s[0]; + s[5] ^= s[1]; + s[1] ^= s[2]; + s[7] ^= s[3]; + s[3] ^= s[4]; + s[4] ^= s[5]; + s[0] ^= s[6]; + s[6] ^= s[7]; + + s[6] ^= t; + + s[7] = rotl(s[7], 21); + + return result_starstar; +} + +static INLINE uint64_t +xoshiro512starstar_next64(xoshiro512starstar_state *state) +{ + return xoshiro512starstar_next(&state->s[0]); +} + +static INLINE uint32_t +xoshiro512starstar_next32(xoshiro512starstar_state *state) +{ + uint64_t next; + if (state->has_uint32) + { + state->has_uint32 = 0; + return state->uinteger; + } + next = xoshiro512starstar_next(&state->s[0]); + state->has_uint32 = 1; + state->uinteger = (uint32_t)(next >> 32); + return (uint32_t)(next & 0xffffffff); +} + +void xoshiro512starstar_jump(xoshiro512starstar_state *state); + +#endif diff --git a/numpy/random/src/xoshiro512starstar/xoshiro512starstar.orig.c b/numpy/random/src/xoshiro512starstar/xoshiro512starstar.orig.c new file mode 100644 index 000000000..0cf884edb --- /dev/null +++ b/numpy/random/src/xoshiro512starstar/xoshiro512starstar.orig.c @@ -0,0 +1,67 @@ +/* Written in 2018 by David Blackman and Sebastiano Vigna (vigna@acm.org) + +To the extent possible under law, the author has dedicated all copyright +and related and neighboring rights to this software to the public domain +worldwide. This software is distributed without any warranty. + +See <http://creativecommons.org/publicdomain/zero/1.0/>. */ + +#include "xoshiro512starstar.orig.h" + +/* This is xoshiro512** 1.0, an all-purpose, rock-solid generator. It has + excellent (about 1ns) speed, an increased state (512 bits) that is + large enough for any parallel application, and it passes all tests we + are aware of. + + For generating just floating-point numbers, xoshiro512+ is even faster. + + The state must be seeded so that it is not everywhere zero. If you have + a 64-bit seed, we suggest to seed a splitmix64 generator and use its + output to fill s. */ + +static inline uint64_t rotl(const uint64_t x, int k) { + return (x << k) | (x >> (64 - k)); +} + + +uint64_t next(void) { + const uint64_t result_starstar = rotl(s[1] * 5, 7) * 9; + + const uint64_t t = s[1] << 11; + + s[2] ^= s[0]; + s[5] ^= s[1]; + s[1] ^= s[2]; + s[7] ^= s[3]; + s[3] ^= s[4]; + s[4] ^= s[5]; + s[0] ^= s[6]; + s[6] ^= s[7]; + + s[6] ^= t; + + s[7] = rotl(s[7], 21); + + return result_starstar; +} + + +/* This is the jump function for the generator. It is equivalent + to 2^256 calls to next(); it can be used to generate 2^256 + non-overlapping subsequences for parallel computations. */ + +void jump(void) { + static const uint64_t JUMP[] = { 0x33ed89b6e7a353f9, 0x760083d7955323be, 0x2837f2fbb5f22fae, 0x4b8c5674d309511c, 0xb11ac47a7ba28c25, 0xf1be7667092bcc1c, 0x53851efdb6df0aaf, 0x1ebbc8b23eaf25db }; + + uint64_t t[sizeof s / sizeof *s]; + memset(t, 0, sizeof t); + for(int i = 0; i < sizeof JUMP / sizeof *JUMP; i++) + for(int b = 0; b < 64; b++) { + if (JUMP[i] & UINT64_C(1) << b) + for(int w = 0; w < sizeof s / sizeof *s; w++) + t[w] ^= s[w]; + next(); + } + + memcpy(s, t, sizeof s); +}
\ No newline at end of file diff --git a/numpy/random/src/xoshiro512starstar/xoshiro512starstar.orig.h b/numpy/random/src/xoshiro512starstar/xoshiro512starstar.orig.h new file mode 100644 index 000000000..0b7892473 --- /dev/null +++ b/numpy/random/src/xoshiro512starstar/xoshiro512starstar.orig.h @@ -0,0 +1,6 @@ +#include <stdint.h> +#include <string.h> + +uint64_t s[8]; +uint64_t next(void); +void jump(void); |
