diff options
| author | mattip <matti.picus@gmail.com> | 2019-04-10 00:50:42 +0300 |
|---|---|---|
| committer | mattip <matti.picus@gmail.com> | 2019-05-20 18:45:27 +0300 |
| commit | c53b2eb729bae1f248a2654dfcfa4a3dd3e2902b (patch) | |
| tree | eed1d982201dc892984feaca355565217069eb20 /numpy/random/src | |
| parent | 7e8e19f9a3b452fdbd992568348b393c31fba005 (diff) | |
| download | numpy-c53b2eb729bae1f248a2654dfcfa4a3dd3e2902b.tar.gz | |
BENCH: convert bencmarks to asv format
remove files that were part of the origal repo
rework randomgen docs to integrate with numpy and fix some links
remove convenience functions, require explicit call to gen.brng
move code out of numpy.random.randomgen into numpy.random
Diffstat (limited to 'numpy/random/src')
100 files changed, 16834 insertions, 0 deletions
diff --git a/numpy/random/src/aligned_malloc/aligned_malloc.c b/numpy/random/src/aligned_malloc/aligned_malloc.c new file mode 100644 index 000000000..6e8192cfb --- /dev/null +++ b/numpy/random/src/aligned_malloc/aligned_malloc.c @@ -0,0 +1,9 @@ +#include "aligned_malloc.h" + +static NPY_INLINE void *PyArray_realloc_aligned(void *p, size_t n); + +static NPY_INLINE void *PyArray_malloc_aligned(size_t n); + +static NPY_INLINE void *PyArray_calloc_aligned(size_t n, size_t s); + +static NPY_INLINE void PyArray_free_aligned(void *p);
\ No newline at end of file diff --git a/numpy/random/src/aligned_malloc/aligned_malloc.h b/numpy/random/src/aligned_malloc/aligned_malloc.h new file mode 100644 index 000000000..ea24f6d23 --- /dev/null +++ b/numpy/random/src/aligned_malloc/aligned_malloc.h @@ -0,0 +1,54 @@ +#ifndef _RANDOMDGEN__ALIGNED_MALLOC_H_ +#define _RANDOMDGEN__ALIGNED_MALLOC_H_ + +#include "Python.h" +#include "numpy/npy_common.h" + +#define NPY_MEMALIGN 16 /* 16 for SSE2, 32 for AVX, 64 for Xeon Phi */ + +static NPY_INLINE void *PyArray_realloc_aligned(void *p, size_t n) +{ + void *p1, **p2, *base; + size_t old_offs, offs = NPY_MEMALIGN - 1 + sizeof(void *); + if (NPY_UNLIKELY(p != NULL)) + { + base = *(((void **)p) - 1); + if (NPY_UNLIKELY((p1 = PyMem_Realloc(base, n + offs)) == NULL)) + return NULL; + if (NPY_LIKELY(p1 == base)) + return p; + p2 = (void **)(((Py_uintptr_t)(p1) + offs) & ~(NPY_MEMALIGN - 1)); + old_offs = (size_t)((Py_uintptr_t)p - (Py_uintptr_t)base); + memmove((void *)p2, ((char *)p1) + old_offs, n); + } + else + { + if (NPY_UNLIKELY((p1 = PyMem_Malloc(n + offs)) == NULL)) + return NULL; + p2 = (void **)(((Py_uintptr_t)(p1) + offs) & ~(NPY_MEMALIGN - 1)); + } + *(p2 - 1) = p1; + return (void *)p2; +} + +static NPY_INLINE void *PyArray_malloc_aligned(size_t n) +{ + return PyArray_realloc_aligned(NULL, n); +} + +static NPY_INLINE void *PyArray_calloc_aligned(size_t n, size_t s) +{ + void *p; + if (NPY_UNLIKELY((p = PyArray_realloc_aligned(NULL, n * s)) == NULL)) + return NULL; + memset(p, 0, n * s); + return p; +} + +static NPY_INLINE void PyArray_free_aligned(void *p) +{ + void *base = *(((void **)p) - 1); + PyMem_Free(base); +} + +#endif diff --git a/numpy/random/src/common/LICENSE.md b/numpy/random/src/common/LICENSE.md new file mode 100644 index 000000000..71bf8cf46 --- /dev/null +++ b/numpy/random/src/common/LICENSE.md @@ -0,0 +1,29 @@ +ISO C9x compliant stdint.h for Microsoft Visual Studio +Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 + +Copyright (c) 2006-2013 Alexander Chemeris + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are met: + +1. Redistributions of source code must retain the above copyright notice, + this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +3. Neither the name of the product nor the names of its contributors may + be used to endorse or promote products derived from this software + without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
\ No newline at end of file diff --git a/numpy/random/src/common/inttypes.h b/numpy/random/src/common/inttypes.h new file mode 100644 index 000000000..8f8b61108 --- /dev/null +++ b/numpy/random/src/common/inttypes.h @@ -0,0 +1,306 @@ +// ISO C9x compliant inttypes.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006-2013 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the product nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_INTTYPES_H_ // [ +#define _MSC_INTTYPES_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#include "stdint.h" + +// 7.8 Format conversion of integer types + +typedef struct { + intmax_t quot; + intmax_t rem; +} imaxdiv_t; + +// 7.8.1 Macros for format specifiers + +#if !defined(__cplusplus) || defined(__STDC_FORMAT_MACROS) // [ See footnote 185 at page 198 + +// The fprintf macros for signed integers are: +#define PRId8 "d" +#define PRIi8 "i" +#define PRIdLEAST8 "d" +#define PRIiLEAST8 "i" +#define PRIdFAST8 "d" +#define PRIiFAST8 "i" + +#define PRId16 "hd" +#define PRIi16 "hi" +#define PRIdLEAST16 "hd" +#define PRIiLEAST16 "hi" +#define PRIdFAST16 "hd" +#define PRIiFAST16 "hi" + +#define PRId32 "I32d" +#define PRIi32 "I32i" +#define PRIdLEAST32 "I32d" +#define PRIiLEAST32 "I32i" +#define PRIdFAST32 "I32d" +#define PRIiFAST32 "I32i" + +#define PRId64 "I64d" +#define PRIi64 "I64i" +#define PRIdLEAST64 "I64d" +#define PRIiLEAST64 "I64i" +#define PRIdFAST64 "I64d" +#define PRIiFAST64 "I64i" + +#define PRIdMAX "I64d" +#define PRIiMAX "I64i" + +#define PRIdPTR "Id" +#define PRIiPTR "Ii" + +// The fprintf macros for unsigned integers are: +#define PRIo8 "o" +#define PRIu8 "u" +#define PRIx8 "x" +#define PRIX8 "X" +#define PRIoLEAST8 "o" +#define PRIuLEAST8 "u" +#define PRIxLEAST8 "x" +#define PRIXLEAST8 "X" +#define PRIoFAST8 "o" +#define PRIuFAST8 "u" +#define PRIxFAST8 "x" +#define PRIXFAST8 "X" + +#define PRIo16 "ho" +#define PRIu16 "hu" +#define PRIx16 "hx" +#define PRIX16 "hX" +#define PRIoLEAST16 "ho" +#define PRIuLEAST16 "hu" +#define PRIxLEAST16 "hx" +#define PRIXLEAST16 "hX" +#define PRIoFAST16 "ho" +#define PRIuFAST16 "hu" +#define PRIxFAST16 "hx" +#define PRIXFAST16 "hX" + +#define PRIo32 "I32o" +#define PRIu32 "I32u" +#define PRIx32 "I32x" +#define PRIX32 "I32X" +#define PRIoLEAST32 "I32o" +#define PRIuLEAST32 "I32u" +#define PRIxLEAST32 "I32x" +#define PRIXLEAST32 "I32X" +#define PRIoFAST32 "I32o" +#define PRIuFAST32 "I32u" +#define PRIxFAST32 "I32x" +#define PRIXFAST32 "I32X" + +#define PRIo64 "I64o" +#define PRIu64 "I64u" +#define PRIx64 "I64x" +#define PRIX64 "I64X" +#define PRIoLEAST64 "I64o" +#define PRIuLEAST64 "I64u" +#define PRIxLEAST64 "I64x" +#define PRIXLEAST64 "I64X" +#define PRIoFAST64 "I64o" +#define PRIuFAST64 "I64u" +#define PRIxFAST64 "I64x" +#define PRIXFAST64 "I64X" + +#define PRIoMAX "I64o" +#define PRIuMAX "I64u" +#define PRIxMAX "I64x" +#define PRIXMAX "I64X" + +#define PRIoPTR "Io" +#define PRIuPTR "Iu" +#define PRIxPTR "Ix" +#define PRIXPTR "IX" + +// The fscanf macros for signed integers are: +#define SCNd8 "d" +#define SCNi8 "i" +#define SCNdLEAST8 "d" +#define SCNiLEAST8 "i" +#define SCNdFAST8 "d" +#define SCNiFAST8 "i" + +#define SCNd16 "hd" +#define SCNi16 "hi" +#define SCNdLEAST16 "hd" +#define SCNiLEAST16 "hi" +#define SCNdFAST16 "hd" +#define SCNiFAST16 "hi" + +#define SCNd32 "ld" +#define SCNi32 "li" +#define SCNdLEAST32 "ld" +#define SCNiLEAST32 "li" +#define SCNdFAST32 "ld" +#define SCNiFAST32 "li" + +#define SCNd64 "I64d" +#define SCNi64 "I64i" +#define SCNdLEAST64 "I64d" +#define SCNiLEAST64 "I64i" +#define SCNdFAST64 "I64d" +#define SCNiFAST64 "I64i" + +#define SCNdMAX "I64d" +#define SCNiMAX "I64i" + +#ifdef _WIN64 // [ +# define SCNdPTR "I64d" +# define SCNiPTR "I64i" +#else // _WIN64 ][ +# define SCNdPTR "ld" +# define SCNiPTR "li" +#endif // _WIN64 ] + +// The fscanf macros for unsigned integers are: +#define SCNo8 "o" +#define SCNu8 "u" +#define SCNx8 "x" +#define SCNX8 "X" +#define SCNoLEAST8 "o" +#define SCNuLEAST8 "u" +#define SCNxLEAST8 "x" +#define SCNXLEAST8 "X" +#define SCNoFAST8 "o" +#define SCNuFAST8 "u" +#define SCNxFAST8 "x" +#define SCNXFAST8 "X" + +#define SCNo16 "ho" +#define SCNu16 "hu" +#define SCNx16 "hx" +#define SCNX16 "hX" +#define SCNoLEAST16 "ho" +#define SCNuLEAST16 "hu" +#define SCNxLEAST16 "hx" +#define SCNXLEAST16 "hX" +#define SCNoFAST16 "ho" +#define SCNuFAST16 "hu" +#define SCNxFAST16 "hx" +#define SCNXFAST16 "hX" + +#define SCNo32 "lo" +#define SCNu32 "lu" +#define SCNx32 "lx" +#define SCNX32 "lX" +#define SCNoLEAST32 "lo" +#define SCNuLEAST32 "lu" +#define SCNxLEAST32 "lx" +#define SCNXLEAST32 "lX" +#define SCNoFAST32 "lo" +#define SCNuFAST32 "lu" +#define SCNxFAST32 "lx" +#define SCNXFAST32 "lX" + +#define SCNo64 "I64o" +#define SCNu64 "I64u" +#define SCNx64 "I64x" +#define SCNX64 "I64X" +#define SCNoLEAST64 "I64o" +#define SCNuLEAST64 "I64u" +#define SCNxLEAST64 "I64x" +#define SCNXLEAST64 "I64X" +#define SCNoFAST64 "I64o" +#define SCNuFAST64 "I64u" +#define SCNxFAST64 "I64x" +#define SCNXFAST64 "I64X" + +#define SCNoMAX "I64o" +#define SCNuMAX "I64u" +#define SCNxMAX "I64x" +#define SCNXMAX "I64X" + +#ifdef _WIN64 // [ +# define SCNoPTR "I64o" +# define SCNuPTR "I64u" +# define SCNxPTR "I64x" +# define SCNXPTR "I64X" +#else // _WIN64 ][ +# define SCNoPTR "lo" +# define SCNuPTR "lu" +# define SCNxPTR "lx" +# define SCNXPTR "lX" +#endif // _WIN64 ] + +#endif // __STDC_FORMAT_MACROS ] + +// 7.8.2 Functions for greatest-width integer types + +// 7.8.2.1 The imaxabs function +#define imaxabs _abs64 + +// 7.8.2.2 The imaxdiv function + +// This is modified version of div() function from Microsoft's div.c found +// in %MSVC.NET%\crt\src\div.c +#ifdef STATIC_IMAXDIV // [ +static +#else // STATIC_IMAXDIV ][ +_inline +#endif // STATIC_IMAXDIV ] +imaxdiv_t __cdecl imaxdiv(intmax_t numer, intmax_t denom) +{ + imaxdiv_t result; + + result.quot = numer / denom; + result.rem = numer % denom; + + if (numer < 0 && result.rem > 0) { + // did division wrong; must fix up + ++result.quot; + result.rem -= denom; + } + + return result; +} + +// 7.8.2.3 The strtoimax and strtoumax functions +#define strtoimax _strtoi64 +#define strtoumax _strtoui64 + +// 7.8.2.4 The wcstoimax and wcstoumax functions +#define wcstoimax _wcstoi64 +#define wcstoumax _wcstoui64 + + +#endif // _MSC_INTTYPES_H_ ]
\ No newline at end of file diff --git a/numpy/random/src/common/stdint.h b/numpy/random/src/common/stdint.h new file mode 100644 index 000000000..710de1570 --- /dev/null +++ b/numpy/random/src/common/stdint.h @@ -0,0 +1,258 @@ +// ISO C9x compliant stdint.h for Microsoft Visual Studio +// Based on ISO/IEC 9899:TC2 Committee draft (May 6, 2005) WG14/N1124 +// +// Copyright (c) 2006-2013 Alexander Chemeris +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions are met: +// +// 1. Redistributions of source code must retain the above copyright notice, +// this list of conditions and the following disclaimer. +// +// 2. Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// +// 3. Neither the name of the product nor the names of its contributors may +// be used to endorse or promote products derived from this software +// without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR IMPLIED +// WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF +// MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO +// EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; +// OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, +// WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR +// OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF +// ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +// +/////////////////////////////////////////////////////////////////////////////// + +#ifndef _MSC_VER // [ +#error "Use this header only with Microsoft Visual C++ compilers!" +#endif // _MSC_VER ] + +#ifndef _MSC_STDINT_H_ // [ +#define _MSC_STDINT_H_ + +#if _MSC_VER > 1000 +#pragma once +#endif + +#if _MSC_VER >= 1600 // [ +#include <stdint.h> +#else // ] _MSC_VER >= 1600 [ + +#include <limits.h> + +// For Visual Studio 6 in C++ mode and for many Visual Studio versions when +// compiling for ARM we should wrap <wchar.h> include with 'extern "C++" {}' +// or compiler give many errors like this: +// error C2733: second C linkage of overloaded function 'wmemchr' not allowed +#ifdef __cplusplus +extern "C" { +#endif +#include <wchar.h> +#ifdef __cplusplus +} +#endif + +// Define _W64 macros to mark types changing their size, like intptr_t. +#ifndef _W64 +#if !defined(__midl) && (defined(_X86_) || defined(_M_IX86)) && _MSC_VER >= 1300 +#define _W64 __w64 +#else +#define _W64 +#endif +#endif + +// 7.18.1 Integer types + +// 7.18.1.1 Exact-width integer types + +// Visual Studio 6 and Embedded Visual C++ 4 doesn't +// realize that, e.g. char has the same size as __int8 +// so we give up on __intX for them. +#if (_MSC_VER < 1300) +typedef signed char int8_t; +typedef signed short int16_t; +typedef signed int int32_t; +typedef unsigned char uint8_t; +typedef unsigned short uint16_t; +typedef unsigned int uint32_t; +#else +typedef signed __int8 int8_t; +typedef signed __int16 int16_t; +typedef signed __int32 int32_t; +typedef unsigned __int8 uint8_t; +typedef unsigned __int16 uint16_t; +typedef unsigned __int32 uint32_t; +#endif +typedef signed __int64 int64_t; +typedef unsigned __int64 uint64_t; + +// 7.18.1.2 Minimum-width integer types +typedef int8_t int_least8_t; +typedef int16_t int_least16_t; +typedef int32_t int_least32_t; +typedef int64_t int_least64_t; +typedef uint8_t uint_least8_t; +typedef uint16_t uint_least16_t; +typedef uint32_t uint_least32_t; +typedef uint64_t uint_least64_t; + +// 7.18.1.3 Fastest minimum-width integer types +typedef int8_t int_fast8_t; +typedef int16_t int_fast16_t; +typedef int32_t int_fast32_t; +typedef int64_t int_fast64_t; +typedef uint8_t uint_fast8_t; +typedef uint16_t uint_fast16_t; +typedef uint32_t uint_fast32_t; +typedef uint64_t uint_fast64_t; + +// 7.18.1.4 Integer types capable of holding object pointers +#ifdef _WIN64 // [ +typedef signed __int64 intptr_t; +typedef unsigned __int64 uintptr_t; +#else // _WIN64 ][ +typedef _W64 signed int intptr_t; +typedef _W64 unsigned int uintptr_t; +#endif // _WIN64 ] + +// 7.18.1.5 Greatest-width integer types +typedef int64_t intmax_t; +typedef uint64_t uintmax_t; + +// 7.18.2 Limits of specified-width integer types + +#if !defined(__cplusplus) || \ + defined(__STDC_LIMIT_MACROS) // [ See footnote 220 at page 257 and + // footnote 221 at page 259 + +// 7.18.2.1 Limits of exact-width integer types +#define INT8_MIN ((int8_t)_I8_MIN) +#define INT8_MAX _I8_MAX +#define INT16_MIN ((int16_t)_I16_MIN) +#define INT16_MAX _I16_MAX +#define INT32_MIN ((int32_t)_I32_MIN) +#define INT32_MAX _I32_MAX +#define INT64_MIN ((int64_t)_I64_MIN) +#define INT64_MAX _I64_MAX +#define UINT8_MAX _UI8_MAX +#define UINT16_MAX _UI16_MAX +#define UINT32_MAX _UI32_MAX +#define UINT64_MAX _UI64_MAX + +// 7.18.2.2 Limits of minimum-width integer types +#define INT_LEAST8_MIN INT8_MIN +#define INT_LEAST8_MAX INT8_MAX +#define INT_LEAST16_MIN INT16_MIN +#define INT_LEAST16_MAX INT16_MAX +#define INT_LEAST32_MIN INT32_MIN +#define INT_LEAST32_MAX INT32_MAX +#define INT_LEAST64_MIN INT64_MIN +#define INT_LEAST64_MAX INT64_MAX +#define UINT_LEAST8_MAX UINT8_MAX +#define UINT_LEAST16_MAX UINT16_MAX +#define UINT_LEAST32_MAX UINT32_MAX +#define UINT_LEAST64_MAX UINT64_MAX + +// 7.18.2.3 Limits of fastest minimum-width integer types +#define INT_FAST8_MIN INT8_MIN +#define INT_FAST8_MAX INT8_MAX +#define INT_FAST16_MIN INT16_MIN +#define INT_FAST16_MAX INT16_MAX +#define INT_FAST32_MIN INT32_MIN +#define INT_FAST32_MAX INT32_MAX +#define INT_FAST64_MIN INT64_MIN +#define INT_FAST64_MAX INT64_MAX +#define UINT_FAST8_MAX UINT8_MAX +#define UINT_FAST16_MAX UINT16_MAX +#define UINT_FAST32_MAX UINT32_MAX +#define UINT_FAST64_MAX UINT64_MAX + +// 7.18.2.4 Limits of integer types capable of holding object pointers +#ifdef _WIN64 // [ +#define INTPTR_MIN INT64_MIN +#define INTPTR_MAX INT64_MAX +#define UINTPTR_MAX UINT64_MAX +#else // _WIN64 ][ +#define INTPTR_MIN INT32_MIN +#define INTPTR_MAX INT32_MAX +#define UINTPTR_MAX UINT32_MAX +#endif // _WIN64 ] + +// 7.18.2.5 Limits of greatest-width integer types +#define INTMAX_MIN INT64_MIN +#define INTMAX_MAX INT64_MAX +#define UINTMAX_MAX UINT64_MAX + +// 7.18.3 Limits of other integer types + +#ifdef _WIN64 // [ +#define PTRDIFF_MIN _I64_MIN +#define PTRDIFF_MAX _I64_MAX +#else // _WIN64 ][ +#define PTRDIFF_MIN _I32_MIN +#define PTRDIFF_MAX _I32_MAX +#endif // _WIN64 ] + +#define SIG_ATOMIC_MIN INT_MIN +#define SIG_ATOMIC_MAX INT_MAX + +#ifndef SIZE_MAX // [ +#ifdef _WIN64 // [ +#define SIZE_MAX _UI64_MAX +#else // _WIN64 ][ +#define SIZE_MAX _UI32_MAX +#endif // _WIN64 ] +#endif // SIZE_MAX ] + +// WCHAR_MIN and WCHAR_MAX are also defined in <wchar.h> +#ifndef WCHAR_MIN // [ +#define WCHAR_MIN 0 +#endif // WCHAR_MIN ] +#ifndef WCHAR_MAX // [ +#define WCHAR_MAX _UI16_MAX +#endif // WCHAR_MAX ] + +#define WINT_MIN 0 +#define WINT_MAX _UI16_MAX + +#endif // __STDC_LIMIT_MACROS ] + +// 7.18.4 Limits of other integer types + +#if !defined(__cplusplus) || \ + defined(__STDC_CONSTANT_MACROS) // [ See footnote 224 at page 260 + +// 7.18.4.1 Macros for minimum-width integer constants + +#define INT8_C(val) val##i8 +#define INT16_C(val) val##i16 +#define INT32_C(val) val##i32 +#define INT64_C(val) val##i64 + +#define UINT8_C(val) val##ui8 +#define UINT16_C(val) val##ui16 +#define UINT32_C(val) val##ui32 +#define UINT64_C(val) val##ui64 + +// 7.18.4.2 Macros for greatest-width integer constants +// These #ifndef's are needed to prevent collisions with <boost/cstdint.hpp>. +// Check out Issue 9 for the details. +#ifndef INTMAX_C // [ +#define INTMAX_C INT64_C +#endif // INTMAX_C ] +#ifndef UINTMAX_C // [ +#define UINTMAX_C UINT64_C +#endif // UINTMAX_C ] + +#endif // __STDC_CONSTANT_MACROS ] + +#endif // _MSC_VER >= 1600 ] + +#endif // _MSC_STDINT_H_ ]
\ No newline at end of file diff --git a/numpy/random/src/distributions/LICENSE.md b/numpy/random/src/distributions/LICENSE.md new file mode 100644 index 000000000..31576ba4b --- /dev/null +++ b/numpy/random/src/distributions/LICENSE.md @@ -0,0 +1,61 @@ +## NumPy + +Copyright (c) 2005-2017, NumPy Developers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + +* Neither the name of the NumPy Developers nor the names of any + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + + +## Julia + +The ziggurat methods were derived from Julia. + +Copyright (c) 2009-2019: Jeff Bezanson, Stefan Karpinski, Viral B. Shah, +and other contributors: + +https://github.com/JuliaLang/julia/contributors + +Permission is hereby granted, free of charge, to any person obtaining +a copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be +included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, +EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND +NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE +LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION +OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION +WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file diff --git a/numpy/random/src/distributions/binomial.h b/numpy/random/src/distributions/binomial.h new file mode 100644 index 000000000..e69de29bb --- /dev/null +++ b/numpy/random/src/distributions/binomial.h diff --git a/numpy/random/src/distributions/distributions.c b/numpy/random/src/distributions/distributions.c new file mode 100644 index 000000000..83806de38 --- /dev/null +++ b/numpy/random/src/distributions/distributions.c @@ -0,0 +1,1811 @@ +#include "distributions.h" +#include "ziggurat.h" +#include "ziggurat_constants.h" + +#if defined(_MSC_VER) && defined(_WIN64) +#include <intrin.h> +#endif + +/* Random generators for external use */ +float random_float(brng_t *brng_state) { + return next_float(brng_state); +} + +double random_double(brng_t *brng_state) { + return next_double(brng_state); +} + +static NPY_INLINE double next_standard_exponential(brng_t *brng_state) { + return -log(1.0 - next_double(brng_state)); +} + +double random_standard_exponential(brng_t *brng_state) { + return next_standard_exponential(brng_state); +} + +void random_standard_exponential_fill(brng_t *brng_state, npy_intp cnt, + double *out) { + npy_intp i; + for (i = 0; i < cnt; i++) { + out[i] = next_standard_exponential(brng_state); + } +} + +float random_standard_exponential_f(brng_t *brng_state) { + return -logf(1.0f - next_float(brng_state)); +} + +void random_double_fill(brng_t *brng_state, npy_intp cnt, double *out) { + npy_intp i; + for (i = 0; i < cnt; i++) { + out[i] = next_double(brng_state); + } +} +#if 0 +double random_gauss(brng_t *brng_state) { + if (brng_state->has_gauss) { + const double temp = brng_state->gauss; + brng_state->has_gauss = false; + brng_state->gauss = 0.0; + return temp; + } else { + double f, x1, x2, r2; + + do { + x1 = 2.0 * next_double(brng_state) - 1.0; + x2 = 2.0 * next_double(brng_state) - 1.0; + r2 = x1 * x1 + x2 * x2; + } while (r2 >= 1.0 || r2 == 0.0); + + /* Polar method, a more efficient version of the Box-Muller approach. */ + f = sqrt(-2.0 * log(r2) / r2); + /* Keep for next call */ + brng_state->gauss = f * x1; + brng_state->has_gauss = true; + return f * x2; + } +} + +float random_gauss_f(brng_t *brng_state) { + if (brng_state->has_gauss_f) { + const float temp = brng_state->gauss_f; + brng_state->has_gauss_f = false; + brng_state->gauss_f = 0.0f; + return temp; + } else { + float f, x1, x2, r2; + + do { + x1 = 2.0f * next_float(brng_state) - 1.0f; + x2 = 2.0f * next_float(brng_state) - 1.0f; + r2 = x1 * x1 + x2 * x2; + } while (r2 >= 1.0 || r2 == 0.0); + + /* Polar method, a more efficient version of the Box-Muller approach. */ + f = sqrtf(-2.0f * logf(r2) / r2); + /* Keep for next call */ + brng_state->gauss_f = f * x1; + brng_state->has_gauss_f = true; + return f * x2; + } +} +#endif + +static NPY_INLINE double standard_exponential_zig(brng_t *brng_state); + +static double standard_exponential_zig_unlikely(brng_t *brng_state, uint8_t idx, + double x) { + if (idx == 0) { + return ziggurat_exp_r - log(next_double(brng_state)); + } else if ((fe_double[idx - 1] - fe_double[idx]) * next_double(brng_state) + + fe_double[idx] < + exp(-x)) { + return x; + } else { + return standard_exponential_zig(brng_state); + } +} + +static NPY_INLINE double standard_exponential_zig(brng_t *brng_state) { + uint64_t ri; + uint8_t idx; + double x; + ri = next_uint64(brng_state); + ri >>= 3; + idx = ri & 0xFF; + ri >>= 8; + x = ri * we_double[idx]; + if (ri < ke_double[idx]) { + return x; /* 98.9% of the time we return here 1st try */ + } + return standard_exponential_zig_unlikely(brng_state, idx, x); +} + +double random_standard_exponential_zig(brng_t *brng_state) { + return standard_exponential_zig(brng_state); +} + +void random_standard_exponential_zig_fill(brng_t *brng_state, npy_intp cnt, + double *out) { + npy_intp i; + for (i = 0; i < cnt; i++) { + out[i] = standard_exponential_zig(brng_state); + } +} + +static NPY_INLINE float standard_exponential_zig_f(brng_t *brng_state); + +static float standard_exponential_zig_unlikely_f(brng_t *brng_state, + uint8_t idx, float x) { + if (idx == 0) { + return ziggurat_exp_r_f - logf(next_float(brng_state)); + } else if ((fe_float[idx - 1] - fe_float[idx]) * next_float(brng_state) + + fe_float[idx] < + expf(-x)) { + return x; + } else { + return standard_exponential_zig_f(brng_state); + } +} + +static NPY_INLINE float standard_exponential_zig_f(brng_t *brng_state) { + uint32_t ri; + uint8_t idx; + float x; + ri = next_uint32(brng_state); + ri >>= 1; + idx = ri & 0xFF; + ri >>= 8; + x = ri * we_float[idx]; + if (ri < ke_float[idx]) { + return x; /* 98.9% of the time we return here 1st try */ + } + return standard_exponential_zig_unlikely_f(brng_state, idx, x); +} + +float random_standard_exponential_zig_f(brng_t *brng_state) { + return standard_exponential_zig_f(brng_state); +} + +static NPY_INLINE double next_gauss_zig(brng_t *brng_state) { + uint64_t r; + int sign; + int64_t rabs; + int idx; + double x, xx, yy; + for (;;) { + /* r = e3n52sb8 */ + r = next_uint64(brng_state); + idx = r & 0xff; + r >>= 8; + sign = r & 0x1; + rabs = (int64_t)((r >> 1) & 0x000fffffffffffff); + x = rabs * wi_double[idx]; + if (sign & 0x1) + x = -x; + if (rabs < ki_double[idx]) + return x; /* 99.3% of the time return here */ + if (idx == 0) { + for (;;) { + xx = -ziggurat_nor_inv_r * log(next_double(brng_state)); + yy = -log(next_double(brng_state)); + if (yy + yy > xx * xx) + return ((rabs >> 8) & 0x1) ? -(ziggurat_nor_r + xx) + : ziggurat_nor_r + xx; + } + } else { + if (((fi_double[idx - 1] - fi_double[idx]) * next_double(brng_state) + + fi_double[idx]) < exp(-0.5 * x * x)) + return x; + } + } +} + +double random_gauss_zig(brng_t *brng_state) { + return next_gauss_zig(brng_state); +} + +void random_gauss_zig_fill(brng_t *brng_state, npy_intp cnt, double *out) { + npy_intp i; + for (i = 0; i < cnt; i++) { + out[i] = next_gauss_zig(brng_state); + } +} + +float random_gauss_zig_f(brng_t *brng_state) { + uint32_t r; + int sign; + int32_t rabs; + int idx; + float x, xx, yy; + for (;;) { + /* r = n23sb8 */ + r = next_uint32(brng_state); + idx = r & 0xff; + sign = (r >> 8) & 0x1; + rabs = (int32_t)((r >> 9) & 0x0007fffff); + x = rabs * wi_float[idx]; + if (sign & 0x1) + x = -x; + if (rabs < ki_float[idx]) + return x; /* # 99.3% of the time return here */ + if (idx == 0) { + for (;;) { + xx = -ziggurat_nor_inv_r_f * logf(next_float(brng_state)); + yy = -logf(next_float(brng_state)); + if (yy + yy > xx * xx) + return ((rabs >> 8) & 0x1) ? -(ziggurat_nor_r_f + xx) + : ziggurat_nor_r_f + xx; + } + } else { + if (((fi_float[idx - 1] - fi_float[idx]) * next_float(brng_state) + + fi_float[idx]) < exp(-0.5 * x * x)) + return x; + } + } +} + +/* +static NPY_INLINE double standard_gamma(brng_t *brng_state, double shape) { + double b, c; + double U, V, X, Y; + + if (shape == 1.0) { + return random_standard_exponential(brng_state); + } else if (shape < 1.0) { + for (;;) { + U = next_double(brng_state); + V = random_standard_exponential(brng_state); + if (U <= 1.0 - shape) { + X = pow(U, 1. / shape); + if (X <= V) { + return X; + } + } else { + Y = -log((1 - U) / shape); + X = pow(1.0 - shape + shape * Y, 1. / shape); + if (X <= (V + Y)) { + return X; + } + } + } + } else { + b = shape - 1. / 3.; + c = 1. / sqrt(9 * b); + for (;;) { + do { + X = random_gauss(brng_state); + V = 1.0 + c * X; + } while (V <= 0.0); + + V = V * V * V; + U = next_double(brng_state); + if (U < 1.0 - 0.0331 * (X * X) * (X * X)) + return (b * V); + if (log(U) < 0.5 * X * X + b * (1. - V + log(V))) + return (b * V); + } + } +} + +static NPY_INLINE float standard_gamma_float(brng_t *brng_state, float shape) { + float b, c; + float U, V, X, Y; + + if (shape == 1.0f) { + return random_standard_exponential_f(brng_state); + } else if (shape < 1.0f) { + for (;;) { + U = next_float(brng_state); + V = random_standard_exponential_f(brng_state); + if (U <= 1.0f - shape) { + X = powf(U, 1.0f / shape); + if (X <= V) { + return X; + } + } else { + Y = -logf((1.0f - U) / shape); + X = powf(1.0f - shape + shape * Y, 1.0f / shape); + if (X <= (V + Y)) { + return X; + } + } + } + } else { + b = shape - 1.0f / 3.0f; + c = 1.0f / sqrtf(9.0f * b); + for (;;) { + do { + X = random_gauss_f(brng_state); + V = 1.0f + c * X; + } while (V <= 0.0f); + + V = V * V * V; + U = next_float(brng_state); + if (U < 1.0f - 0.0331f * (X * X) * (X * X)) + return (b * V); + if (logf(U) < 0.5f * X * X + b * (1.0f - V + logf(V))) + return (b * V); + } + } +} + + +double random_standard_gamma(brng_t *brng_state, double shape) { + return standard_gamma(brng_state, shape); +} + +float random_standard_gamma_f(brng_t *brng_state, float shape) { + return standard_gamma_float(brng_state, shape); +} +*/ + +static NPY_INLINE double standard_gamma_zig(brng_t *brng_state, double shape) { + double b, c; + double U, V, X, Y; + + if (shape == 1.0) { + return random_standard_exponential_zig(brng_state); + } else if (shape == 0.0) { + return 0.0; + } else if (shape < 1.0) { + for (;;) { + U = next_double(brng_state); + V = random_standard_exponential_zig(brng_state); + if (U <= 1.0 - shape) { + X = pow(U, 1. / shape); + if (X <= V) { + return X; + } + } else { + Y = -log((1 - U) / shape); + X = pow(1.0 - shape + shape * Y, 1. / shape); + if (X <= (V + Y)) { + return X; + } + } + } + } else { + b = shape - 1. / 3.; + c = 1. / sqrt(9 * b); + for (;;) { + do { + X = random_gauss_zig(brng_state); + V = 1.0 + c * X; + } while (V <= 0.0); + + V = V * V * V; + U = next_double(brng_state); + if (U < 1.0 - 0.0331 * (X * X) * (X * X)) + return (b * V); + if (log(U) < 0.5 * X * X + b * (1. - V + log(V))) + return (b * V); + } + } +} + +static NPY_INLINE float standard_gamma_zig_f(brng_t *brng_state, float shape) { + float b, c; + float U, V, X, Y; + + if (shape == 1.0f) { + return random_standard_exponential_zig_f(brng_state); + } else if (shape == 0.0) { + return 0.0; + } else if (shape < 1.0f) { + for (;;) { + U = next_float(brng_state); + V = random_standard_exponential_zig_f(brng_state); + if (U <= 1.0f - shape) { + X = powf(U, 1.0f / shape); + if (X <= V) { + return X; + } + } else { + Y = -logf((1.0f - U) / shape); + X = powf(1.0f - shape + shape * Y, 1.0f / shape); + if (X <= (V + Y)) { + return X; + } + } + } + } else { + b = shape - 1.0f / 3.0f; + c = 1.0f / sqrtf(9.0f * b); + for (;;) { + do { + X = random_gauss_zig_f(brng_state); + V = 1.0f + c * X; + } while (V <= 0.0f); + + V = V * V * V; + U = next_float(brng_state); + if (U < 1.0f - 0.0331f * (X * X) * (X * X)) + return (b * V); + if (logf(U) < 0.5f * X * X + b * (1.0f - V + logf(V))) + return (b * V); + } + } +} + +double random_standard_gamma_zig(brng_t *brng_state, double shape) { + return standard_gamma_zig(brng_state, shape); +} + +float random_standard_gamma_zig_f(brng_t *brng_state, float shape) { + return standard_gamma_zig_f(brng_state, shape); +} + +int64_t random_positive_int64(brng_t *brng_state) { + return next_uint64(brng_state) >> 1; +} + +int32_t random_positive_int32(brng_t *brng_state) { + return next_uint32(brng_state) >> 1; +} + +int64_t random_positive_int(brng_t *brng_state) { +#if ULONG_MAX <= 0xffffffffUL + return (int64_t)(next_uint32(brng_state) >> 1); +#else + return (int64_t)(next_uint64(brng_state) >> 1); +#endif +} + +uint64_t random_uint(brng_t *brng_state) { +#if ULONG_MAX <= 0xffffffffUL + return next_uint32(brng_state); +#else + return next_uint64(brng_state); +#endif +} + +/* + * log-gamma function to support some of these distributions. The + * algorithm comes from SPECFUN by Shanjie Zhang and Jianming Jin and their + * book "Computation of Special Functions", 1996, John Wiley & Sons, Inc. + */ +static double loggam(double x) { + double x0, x2, xp, gl, gl0; + int64_t k, n; + + static double a[10] = {8.333333333333333e-02, -2.777777777777778e-03, + 7.936507936507937e-04, -5.952380952380952e-04, + 8.417508417508418e-04, -1.917526917526918e-03, + 6.410256410256410e-03, -2.955065359477124e-02, + 1.796443723688307e-01, -1.39243221690590e+00}; + x0 = x; + n = 0; + if ((x == 1.0) || (x == 2.0)) { + return 0.0; + } else if (x <= 7.0) { + n = (int64_t)(7 - x); + x0 = x + n; + } + x2 = 1.0 / (x0 * x0); + xp = 2 * M_PI; + gl0 = a[9]; + for (k = 8; k >= 0; k--) { + gl0 *= x2; + gl0 += a[k]; + } + gl = gl0 / x0 + 0.5 * log(xp) + (x0 - 0.5) * log(x0) - x0; + if (x <= 7.0) { + for (k = 1; k <= n; k++) { + gl -= log(x0 - 1.0); + x0 -= 1.0; + } + } + return gl; +} + +/* +double random_normal(brng_t *brng_state, double loc, double scale) { + return loc + scale * random_gauss(brng_state); +} +*/ + +double random_normal_zig(brng_t *brng_state, double loc, double scale) { + return loc + scale * random_gauss_zig(brng_state); +} + +double random_exponential(brng_t *brng_state, double scale) { + return scale * standard_exponential_zig(brng_state); +} + +double random_uniform(brng_t *brng_state, double lower, double range) { + return lower + range * next_double(brng_state); +} + +double random_gamma(brng_t *brng_state, double shape, double scale) { + return scale * random_standard_gamma_zig(brng_state, shape); +} + +float random_gamma_float(brng_t *brng_state, float shape, float scale) { + return scale * random_standard_gamma_zig_f(brng_state, shape); +} + +double random_beta(brng_t *brng_state, double a, double b) { + double Ga, Gb; + + if ((a <= 1.0) && (b <= 1.0)) { + double U, V, X, Y; + /* Use Johnk's algorithm */ + + while (1) { + U = next_double(brng_state); + V = next_double(brng_state); + X = pow(U, 1.0 / a); + Y = pow(V, 1.0 / b); + + if ((X + Y) <= 1.0) { + if (X + Y > 0) { + return X / (X + Y); + } else { + double logX = log(U) / a; + double logY = log(V) / b; + double logM = logX > logY ? logX : logY; + logX -= logM; + logY -= logM; + + return exp(logX - log(exp(logX) + exp(logY))); + } + } + } + } else { + Ga = random_standard_gamma_zig(brng_state, a); + Gb = random_standard_gamma_zig(brng_state, b); + return Ga / (Ga + Gb); + } +} + +double random_chisquare(brng_t *brng_state, double df) { + return 2.0 * random_standard_gamma_zig(brng_state, df / 2.0); +} + +double random_f(brng_t *brng_state, double dfnum, double dfden) { + return ((random_chisquare(brng_state, dfnum) * dfden) / + (random_chisquare(brng_state, dfden) * dfnum)); +} + +double random_standard_cauchy(brng_t *brng_state) { + return random_gauss_zig(brng_state) / random_gauss_zig(brng_state); +} + +double random_pareto(brng_t *brng_state, double a) { + return exp(standard_exponential_zig(brng_state) / a) - 1; +} + +double random_weibull(brng_t *brng_state, double a) { + if (a == 0.0) { + return 0.0; + } + return pow(standard_exponential_zig(brng_state), 1. / a); +} + +double random_power(brng_t *brng_state, double a) { + return pow(1 - exp(-standard_exponential_zig(brng_state)), 1. / a); +} + +double random_laplace(brng_t *brng_state, double loc, double scale) { + double U; + + U = next_double(brng_state); + if (U < 0.5) { + U = loc + scale * log(U + U); + } else { + U = loc - scale * log(2.0 - U - U); + } + return U; +} + +double random_gumbel(brng_t *brng_state, double loc, double scale) { + double U; + + U = 1.0 - next_double(brng_state); + return loc - scale * log(-log(U)); +} + +double random_logistic(brng_t *brng_state, double loc, double scale) { + double U; + + U = next_double(brng_state); + return loc + scale * log(U / (1.0 - U)); +} + +double random_lognormal(brng_t *brng_state, double mean, double sigma) { + return exp(random_normal_zig(brng_state, mean, sigma)); +} + +double random_rayleigh(brng_t *brng_state, double mode) { + return mode * sqrt(-2.0 * log(1.0 - next_double(brng_state))); +} + +double random_standard_t(brng_t *brng_state, double df) { + double num, denom; + + num = random_gauss_zig(brng_state); + denom = random_standard_gamma_zig(brng_state, df / 2); + return sqrt(df / 2) * num / sqrt(denom); +} + +static int64_t random_poisson_mult(brng_t *brng_state, double lam) { + int64_t X; + double prod, U, enlam; + + enlam = exp(-lam); + X = 0; + prod = 1.0; + while (1) { + U = next_double(brng_state); + prod *= U; + if (prod > enlam) { + X += 1; + } else { + return X; + } + } +} + +/* + * The transformed rejection method for generating Poisson random variables + * W. Hoermann + * Insurance: Mathematics and Economics 12, 39-45 (1993) + */ +#define LS2PI 0.91893853320467267 +#define TWELFTH 0.083333333333333333333333 +static int64_t random_poisson_ptrs(brng_t *brng_state, double lam) { + int64_t k; + double U, V, slam, loglam, a, b, invalpha, vr, us; + + slam = sqrt(lam); + loglam = log(lam); + b = 0.931 + 2.53 * slam; + a = -0.059 + 0.02483 * b; + invalpha = 1.1239 + 1.1328 / (b - 3.4); + vr = 0.9277 - 3.6224 / (b - 2); + + while (1) { + U = next_double(brng_state) - 0.5; + V = next_double(brng_state); + us = 0.5 - fabs(U); + k = (int64_t)floor((2 * a / us + b) * U + lam + 0.43); + if ((us >= 0.07) && (V <= vr)) { + return k; + } + if ((k < 0) || ((us < 0.013) && (V > us))) { + continue; + } + if ((log(V) + log(invalpha) - log(a / (us * us) + b)) <= + (-lam + k * loglam - loggam(k + 1))) { + return k; + } + } +} + +int64_t random_poisson(brng_t *brng_state, double lam) { + if (lam >= 10) { + return random_poisson_ptrs(brng_state, lam); + } else if (lam == 0) { + return 0; + } else { + return random_poisson_mult(brng_state, lam); + } +} + +int64_t random_negative_binomial(brng_t *brng_state, double n, double p) { + double Y = random_gamma(brng_state, n, (1 - p) / p); + return random_poisson(brng_state, Y); +} + +int64_t random_binomial_btpe(brng_t *brng_state, int64_t n, double p, + binomial_t *binomial) { + double r, q, fm, p1, xm, xl, xr, c, laml, lamr, p2, p3, p4; + double a, u, v, s, F, rho, t, A, nrq, x1, x2, f1, f2, z, z2, w, w2, x; + int64_t m, y, k, i; + + if (!(binomial->has_binomial) || (binomial->nsave != n) || + (binomial->psave != p)) { + /* initialize */ + binomial->nsave = n; + binomial->psave = p; + binomial->has_binomial = 1; + binomial->r = r = MIN(p, 1.0 - p); + binomial->q = q = 1.0 - r; + binomial->fm = fm = n * r + r; + binomial->m = m = (int64_t)floor(binomial->fm); + binomial->p1 = p1 = floor(2.195 * sqrt(n * r * q) - 4.6 * q) + 0.5; + binomial->xm = xm = m + 0.5; + binomial->xl = xl = xm - p1; + binomial->xr = xr = xm + p1; + binomial->c = c = 0.134 + 20.5 / (15.3 + m); + a = (fm - xl) / (fm - xl * r); + binomial->laml = laml = a * (1.0 + a / 2.0); + a = (xr - fm) / (xr * q); + binomial->lamr = lamr = a * (1.0 + a / 2.0); + binomial->p2 = p2 = p1 * (1.0 + 2.0 * c); + binomial->p3 = p3 = p2 + c / laml; + binomial->p4 = p4 = p3 + c / lamr; + } else { + r = binomial->r; + q = binomial->q; + fm = binomial->fm; + m = binomial->m; + p1 = binomial->p1; + xm = binomial->xm; + xl = binomial->xl; + xr = binomial->xr; + c = binomial->c; + laml = binomial->laml; + lamr = binomial->lamr; + p2 = binomial->p2; + p3 = binomial->p3; + p4 = binomial->p4; + } + +/* sigh ... */ +Step10: + nrq = n * r * q; + u = next_double(brng_state) * p4; + v = next_double(brng_state); + if (u > p1) + goto Step20; + y = (int64_t)floor(xm - p1 * v + u); + goto Step60; + +Step20: + if (u > p2) + goto Step30; + x = xl + (u - p1) / c; + v = v * c + 1.0 - fabs(m - x + 0.5) / p1; + if (v > 1.0) + goto Step10; + y = (int64_t)floor(x); + goto Step50; + +Step30: + if (u > p3) + goto Step40; + y = (int64_t)floor(xl + log(v) / laml); + if (y < 0) + goto Step10; + v = v * (u - p2) * laml; + goto Step50; + +Step40: + y = (int64_t)floor(xr - log(v) / lamr); + if (y > n) + goto Step10; + v = v * (u - p3) * lamr; + +Step50: + k = llabs(y - m); + if ((k > 20) && (k < ((nrq) / 2.0 - 1))) + goto Step52; + + s = r / q; + a = s * (n + 1); + F = 1.0; + if (m < y) { + for (i = m + 1; i <= y; i++) { + F *= (a / i - s); + } + } else if (m > y) { + for (i = y + 1; i <= m; i++) { + F /= (a / i - s); + } + } + if (v > F) + goto Step10; + goto Step60; + +Step52: + rho = + (k / (nrq)) * ((k * (k / 3.0 + 0.625) + 0.16666666666666666) / nrq + 0.5); + t = -k * k / (2 * nrq); + A = log(v); + if (A < (t - rho)) + goto Step60; + if (A > (t + rho)) + goto Step10; + + x1 = y + 1; + f1 = m + 1; + z = n + 1 - m; + w = n - y + 1; + x2 = x1 * x1; + f2 = f1 * f1; + z2 = z * z; + w2 = w * w; + if (A > (xm * log(f1 / x1) + (n - m + 0.5) * log(z / w) + + (y - m) * log(w * r / (x1 * q)) + + (13680. - (462. - (132. - (99. - 140. / f2) / f2) / f2) / f2) / f1 / + 166320. + + (13680. - (462. - (132. - (99. - 140. / z2) / z2) / z2) / z2) / z / + 166320. + + (13680. - (462. - (132. - (99. - 140. / x2) / x2) / x2) / x2) / x1 / + 166320. + + (13680. - (462. - (132. - (99. - 140. / w2) / w2) / w2) / w2) / w / + 166320.)) { + goto Step10; + } + +Step60: + if (p > 0.5) { + y = n - y; + } + + return y; +} + +int64_t random_binomial_inversion(brng_t *brng_state, int64_t n, double p, + binomial_t *binomial) { + double q, qn, np, px, U; + int64_t X, bound; + + if (!(binomial->has_binomial) || (binomial->nsave != n) || + (binomial->psave != p)) { + binomial->nsave = n; + binomial->psave = p; + binomial->has_binomial = 1; + binomial->q = q = 1.0 - p; + binomial->r = qn = exp(n * log(q)); + binomial->c = np = n * p; + binomial->m = bound = (int64_t)MIN(n, np + 10.0 * sqrt(np * q + 1)); + } else { + q = binomial->q; + qn = binomial->r; + np = binomial->c; + bound = binomial->m; + } + X = 0; + px = qn; + U = next_double(brng_state); + while (U > px) { + X++; + if (X > bound) { + X = 0; + px = qn; + U = next_double(brng_state); + } else { + U -= px; + px = ((n - X + 1) * p * px) / (X * q); + } + } + return X; +} + +int64_t random_binomial(brng_t *brng_state, double p, int64_t n, + binomial_t *binomial) { + double q; + + if ((n == 0LL) || (p == 0.0f)) + return 0; + + if (p <= 0.5) { + if (p * n <= 30.0) { + return random_binomial_inversion(brng_state, n, p, binomial); + } else { + return random_binomial_btpe(brng_state, n, p, binomial); + } + } else { + q = 1.0 - p; + if (q * n <= 30.0) { + return n - random_binomial_inversion(brng_state, n, q, binomial); + } else { + return n - random_binomial_btpe(brng_state, n, q, binomial); + } + } +} + +double random_noncentral_chisquare(brng_t *brng_state, double df, double nonc) { + if (npy_isnan(nonc)){ + return NPY_NAN; + } + if (nonc == 0) { + return random_chisquare(brng_state, df); + } + if (1 < df) { + const double Chi2 = random_chisquare(brng_state, df - 1); + const double n = random_gauss_zig(brng_state) + sqrt(nonc); + return Chi2 + n * n; + } else { + const int64_t i = random_poisson(brng_state, nonc / 2.0); + return random_chisquare(brng_state, df + 2 * i); + } +} + +double random_noncentral_f(brng_t *brng_state, double dfnum, double dfden, + double nonc) { + double t = random_noncentral_chisquare(brng_state, dfnum, nonc) * dfden; + return t / (random_chisquare(brng_state, dfden) * dfnum); +} + +double random_wald(brng_t *brng_state, double mean, double scale) { + double U, X, Y; + double mu_2l; + + mu_2l = mean / (2 * scale); + Y = random_gauss_zig(brng_state); + Y = mean * Y * Y; + X = mean + mu_2l * (Y - sqrt(4 * scale * Y + Y * Y)); + U = next_double(brng_state); + if (U <= mean / (mean + X)) { + return X; + } else { + return mean * mean / X; + } +} + +double random_vonmises(brng_t *brng_state, double mu, double kappa) { + double s; + double U, V, W, Y, Z; + double result, mod; + int neg; + if (npy_isnan(kappa)){ + return NPY_NAN; + } + if (kappa < 1e-8) { + return M_PI * (2 * next_double(brng_state) - 1); + } else { + /* with double precision rho is zero until 1.4e-8 */ + if (kappa < 1e-5) { + /* + * second order taylor expansion around kappa = 0 + * precise until relatively large kappas as second order is 0 + */ + s = (1. / kappa + kappa); + } else { + double r = 1 + sqrt(1 + 4 * kappa * kappa); + double rho = (r - sqrt(2 * r)) / (2 * kappa); + s = (1 + rho * rho) / (2 * rho); + } + + while (1) { + U = next_double(brng_state); + Z = cos(M_PI * U); + W = (1 + s * Z) / (s + Z); + Y = kappa * (s - W); + V = next_double(brng_state); + if ((Y * (2 - Y) - V >= 0) || (log(Y / V) + 1 - Y >= 0)) { + break; + } + } + + U = next_double(brng_state); + + result = acos(W); + if (U < 0.5) { + result = -result; + } + result += mu; + neg = (result < 0); + mod = fabs(result); + mod = (fmod(mod + M_PI, 2 * M_PI) - M_PI); + if (neg) { + mod *= -1; + } + + return mod; + } +} + +int64_t random_logseries(brng_t *brng_state, double p) { + double q, r, U, V; + int64_t result; + + r = log(1.0 - p); + + while (1) { + V = next_double(brng_state); + if (V >= p) { + return 1; + } + U = next_double(brng_state); + q = 1.0 - exp(r * U); + if (V <= q * q) { + result = (int64_t)floor(1 + log(V) / log(q)); + if (result < 1) { + continue; + } else { + return result; + } + } + if (V >= q) { + return 1; + } + return 2; + } +} + +int64_t random_geometric_search(brng_t *brng_state, double p) { + double U; + int64_t X; + double sum, prod, q; + + X = 1; + sum = prod = p; + q = 1.0 - p; + U = next_double(brng_state); + while (U > sum) { + prod *= q; + sum += prod; + X++; + } + return X; +} + +int64_t random_geometric_inversion(brng_t *brng_state, double p) { + return (int64_t)ceil(log(1.0 - next_double(brng_state)) / log(1.0 - p)); +} + +int64_t random_geometric(brng_t *brng_state, double p) { + if (p >= 0.333333333333333333333333) { + return random_geometric_search(brng_state, p); + } else { + return random_geometric_inversion(brng_state, p); + } +} + +int64_t random_zipf(brng_t *brng_state, double a) { + double T, U, V; + int64_t X; + double am1, b; + + am1 = a - 1.0; + b = pow(2.0, am1); + do { + U = 1.0 - next_double(brng_state); + V = next_double(brng_state); + X = (int64_t)floor(pow(U, -1.0 / am1)); + /* The real result may be above what can be represented in a int64. + * It will get casted to -sys.maxint-1. Since this is + * a straightforward rejection algorithm, we can just reject this value + * in the rejection condition below. This function then models a Zipf + * distribution truncated to sys.maxint. + */ + T = pow(1.0 + 1.0 / X, am1); + } while (((V * X * (T - 1.0) / (b - 1.0)) > (T / b)) || X < 1); + return X; +} + +double random_triangular(brng_t *brng_state, double left, double mode, + double right) { + double base, leftbase, ratio, leftprod, rightprod; + double U; + + base = right - left; + leftbase = mode - left; + ratio = leftbase / base; + leftprod = leftbase * base; + rightprod = (right - mode) * base; + + U = next_double(brng_state); + if (U <= ratio) { + return left + sqrt(U * leftprod); + } else { + return right - sqrt((1.0 - U) * rightprod); + } +} + +int64_t random_hypergeometric_hyp(brng_t *brng_state, int64_t good, int64_t bad, + int64_t sample) { + int64_t d1, k, z; + double d2, u, y; + + d1 = bad + good - sample; + d2 = (double)MIN(bad, good); + + y = d2; + k = sample; + while (y > 0.0) { + u = next_double(brng_state); + y -= (int64_t)floor(u + y / (d1 + k)); + k--; + if (k == 0) + break; + } + z = (int64_t)(d2 - y); + if (good > bad) + z = sample - z; + return z; +} + +/* D1 = 2*sqrt(2/e) */ +/* D2 = 3 - 2*sqrt(3/e) */ +#define D1 1.7155277699214135 +#define D2 0.8989161620588988 +int64_t random_hypergeometric_hrua(brng_t *brng_state, int64_t good, + int64_t bad, int64_t sample) { + int64_t mingoodbad, maxgoodbad, popsize, m, d9; + double d4, d5, d6, d7, d8, d10, d11; + int64_t Z; + double T, W, X, Y; + + mingoodbad = MIN(good, bad); + popsize = good + bad; + maxgoodbad = MAX(good, bad); + m = MIN(sample, popsize - sample); + d4 = ((double)mingoodbad) / popsize; + d5 = 1.0 - d4; + d6 = m * d4 + 0.5; + d7 = sqrt((double)(popsize - m) * sample * d4 * d5 / (popsize - 1) + 0.5); + d8 = D1 * d7 + D2; + d9 = (int64_t)floor((double)(m + 1) * (mingoodbad + 1) / (popsize + 2)); + d10 = (loggam(d9 + 1) + loggam(mingoodbad - d9 + 1) + loggam(m - d9 + 1) + + loggam(maxgoodbad - m + d9 + 1)); + d11 = MIN(MIN(m, mingoodbad) + 1.0, floor(d6 + 16 * d7)); + /* 16 for 16-decimal-digit precision in D1 and D2 */ + + while (1) { + X = next_double(brng_state); + Y = next_double(brng_state); + W = d6 + d8 * (Y - 0.5) / X; + + /* fast rejection: */ + if ((W < 0.0) || (W >= d11)) + continue; + + Z = (int64_t)floor(W); + T = d10 - (loggam(Z + 1) + loggam(mingoodbad - Z + 1) + loggam(m - Z + 1) + + loggam(maxgoodbad - m + Z + 1)); + + /* fast acceptance: */ + if ((X * (4.0 - X) - 3.0) <= T) + break; + + /* fast rejection: */ + if (X * (X - T) >= 1) + continue; + + if (2.0 * log(X) <= T) + break; /* acceptance */ + } + + /* this is a correction to HRUA* by Ivan Frohne in rv.py */ + if (good > bad) + Z = m - Z; + + /* another fix from rv.py to allow sample to exceed popsize/2 */ + if (m < sample) + Z = good - Z; + + return Z; +} +#undef D1 +#undef D2 + +int64_t random_hypergeometric(brng_t *brng_state, int64_t good, int64_t bad, + int64_t sample) { + if (sample > 10) { + return random_hypergeometric_hrua(brng_state, good, bad, sample); + } else { + return random_hypergeometric_hyp(brng_state, good, bad, sample); + } +} + +uint64_t random_interval(brng_t *brng_state, uint64_t max) { + uint64_t mask, value; + if (max == 0) { + return 0; + } + + mask = max; + + /* Smallest bit mask >= max */ + mask |= mask >> 1; + mask |= mask >> 2; + mask |= mask >> 4; + mask |= mask >> 8; + mask |= mask >> 16; + mask |= mask >> 32; + + /* Search a random value in [0..mask] <= max */ + if (max <= 0xffffffffUL) { + while ((value = (next_uint32(brng_state) & mask)) > max) + ; + } else { + while ((value = (next_uint64(brng_state) & mask)) > max) + ; + } + return value; +} + +static NPY_INLINE uint64_t gen_mask(uint64_t max) { + uint64_t mask = max; + mask |= mask >> 1; + mask |= mask >> 2; + mask |= mask >> 4; + mask |= mask >> 8; + mask |= mask >> 16; + mask |= mask >> 32; + return mask; +} + +/* Generate 16 bit random numbers using a 32 bit buffer. */ +static NPY_INLINE uint16_t buffered_uint16(brng_t *brng_state, int *bcnt, + uint32_t *buf) { + if (!(bcnt[0])) { + buf[0] = next_uint32(brng_state); + bcnt[0] = 1; + } else { + buf[0] >>= 16; + bcnt[0] -= 1; + } + + return (uint16_t)buf[0]; +} + +/* Generate 8 bit random numbers using a 32 bit buffer. */ +static NPY_INLINE uint8_t buffered_uint8(brng_t *brng_state, int *bcnt, + uint32_t *buf) { + if (!(bcnt[0])) { + buf[0] = next_uint32(brng_state); + bcnt[0] = 3; + } else { + buf[0] >>= 8; + bcnt[0] -= 1; + } + + return (uint8_t)buf[0]; +} + +/* Static `masked rejection` function called by random_bounded_uint64(...) */ +static NPY_INLINE uint64_t bounded_masked_uint64(brng_t *brng_state, + uint64_t rng, uint64_t mask) { + uint64_t val; + + while ((val = (next_uint64(brng_state) & mask)) > rng) + ; + + return val; +} + +/* Static `masked rejection` function called by + * random_buffered_bounded_uint32(...) */ +static NPY_INLINE uint32_t buffered_bounded_masked_uint32( + brng_t *brng_state, uint32_t rng, uint32_t mask, int *bcnt, uint32_t *buf) { + /* + * The buffer and buffer count are not used here but are included to allow + * this function to be templated with the similar uint8 and uint16 + * functions + */ + + uint32_t val; + + while ((val = (next_uint32(brng_state) & mask)) > rng) + ; + + return val; +} + +/* Static `masked rejection` function called by + * random_buffered_bounded_uint16(...) */ +static NPY_INLINE uint16_t buffered_bounded_masked_uint16( + brng_t *brng_state, uint16_t rng, uint16_t mask, int *bcnt, uint32_t *buf) { + uint16_t val; + + while ((val = (buffered_uint16(brng_state, bcnt, buf) & mask)) > rng) + ; + + return val; +} + +/* Static `masked rejection` function called by + * random_buffered_bounded_uint8(...) */ +static NPY_INLINE uint8_t buffered_bounded_masked_uint8(brng_t *brng_state, + uint8_t rng, + uint8_t mask, int *bcnt, + uint32_t *buf) { + uint8_t val; + + while ((val = (buffered_uint8(brng_state, bcnt, buf) & mask)) > rng) + ; + + return val; +} + +/* Static `Lemire rejection` function called by random_bounded_uint64(...) */ +static NPY_INLINE uint64_t bounded_lemire_uint64(brng_t *brng_state, + uint64_t rng) { + /* + * Uses Lemire's algorithm - https://arxiv.org/abs/1805.10941 + * + * Note: `rng` should not be 0xFFFFFFFFFFFFFFFF. When this happens `rng_excl` + * becomes zero. + */ + const uint64_t rng_excl = rng + 1; + +#if __SIZEOF_INT128__ + /* 128-bit uint available (e.g. GCC/clang). `m` is the __uint128_t scaled + * integer. */ + __uint128_t m; + uint64_t leftover; + + /* Generate a scaled random number. */ + m = ((__uint128_t)next_uint64(brng_state)) * rng_excl; + + /* Rejection sampling to remove any bias. */ + leftover = m & 0xFFFFFFFFFFFFFFFFULL; + + if (leftover < rng_excl) { + /* `rng_excl` is a simple upper bound for `threshold`. */ + + const uint64_t threshold = -rng_excl % rng_excl; + /* Same as: threshold=((uint64_t)(0x10000000000000000ULLL - rng_excl)) % + * rng_excl; */ + + while (leftover < threshold) { + m = ((__uint128_t)next_uint64(brng_state)) * rng_excl; + leftover = m & 0xFFFFFFFFFFFFFFFFULL; + } + } + + return (m >> 64); +#else + /* 128-bit uint NOT available (e.g. MSVS). `m1` is the upper 64-bits of the + * scaled integer. */ + uint64_t m1; + uint64_t x; + uint64_t leftover; + + x = next_uint64(brng_state); + + /* Rejection sampling to remove any bias. */ + leftover = x * rng_excl; /* The lower 64-bits of the mult. */ + + if (leftover < rng_excl) { + /* `rng_excl` is a simple upper bound for `threshold`. */ + + const uint64_t threshold = -rng_excl % rng_excl; + /* Same as:threshold=((uint64_t)(0x10000000000000000ULLL - rng_excl)) % + * rng_excl; */ + + while (leftover < threshold) { + x = next_uint64(brng_state); + leftover = x * rng_excl; + } + } + +#if defined(_MSC_VER) && defined(_WIN64) + /* _WIN64 architecture. Use the __umulh intrinsic to calc `m1`. */ + m1 = __umulh(x, rng_excl); +#else + /* 32-bit architecture. Emulate __umulh to calc `m1`. */ + { + uint64_t x0, x1, rng_excl0, rng_excl1; + uint64_t w0, w1, w2, t; + + x0 = x & 0xFFFFFFFFULL; + x1 = x >> 32; + rng_excl0 = rng_excl & 0xFFFFFFFFULL; + rng_excl1 = rng_excl >> 32; + w0 = x0 * rng_excl0; + t = x1 * rng_excl0 + (w0 >> 32); + w1 = t & 0xFFFFFFFFULL; + w2 = t >> 32; + w1 += x0 * rng_excl1; + m1 = x1 * rng_excl1 + w2 + (w1 >> 32); + } +#endif + + return m1; +#endif +} + +/* Static `Lemire rejection` function called by + * random_buffered_bounded_uint32(...) */ +static NPY_INLINE uint32_t buffered_bounded_lemire_uint32(brng_t *brng_state, + uint32_t rng, + int *bcnt, + uint32_t *buf) { + /* + * Uses Lemire's algorithm - https://arxiv.org/abs/1805.10941 + * + * The buffer and buffer count are not used here but are included to allow + * this function to be templated with the similar uint8 and uint16 + * functions + * + * Note: `rng` should not be 0xFFFFFFFF. When this happens `rng_excl` becomes + * zero. + */ + const uint32_t rng_excl = rng + 1; + + uint64_t m; + uint32_t leftover; + + /* Generate a scaled random number. */ + m = ((uint64_t)next_uint32(brng_state)) * rng_excl; + + /* Rejection sampling to remove any bias */ + leftover = m & 0xFFFFFFFFUL; + + if (leftover < rng_excl) { + /* `rng_excl` is a simple upper bound for `threshold`. */ + const uint32_t threshold = -rng_excl % rng_excl; + /* Same as: threshold=((uint64_t)(0x100000000ULL - rng_excl)) % rng_excl; */ + + while (leftover < threshold) { + m = ((uint64_t)next_uint32(brng_state)) * rng_excl; + leftover = m & 0xFFFFFFFFUL; + } + } + + return (m >> 32); +} + +/* Static `Lemire rejection` function called by + * random_buffered_bounded_uint16(...) */ +static NPY_INLINE uint16_t buffered_bounded_lemire_uint16(brng_t *brng_state, + uint16_t rng, + int *bcnt, + uint32_t *buf) { + /* + * Uses Lemire's algorithm - https://arxiv.org/abs/1805.10941 + * + * Note: `rng` should not be 0xFFFF. When this happens `rng_excl` becomes + * zero. + */ + const uint16_t rng_excl = rng + 1; + + uint32_t m; + uint16_t leftover; + + /* Generate a scaled random number. */ + m = ((uint32_t)buffered_uint16(brng_state, bcnt, buf)) * rng_excl; + + /* Rejection sampling to remove any bias */ + leftover = m & 0xFFFFUL; + + if (leftover < rng_excl) { + /* `rng_excl` is a simple upper bound for `threshold`. */ + const uint16_t threshold = -rng_excl % rng_excl; + /* Same as: threshold=((uint32_t)(0x10000ULL - rng_excl)) % rng_excl; */ + + while (leftover < threshold) { + m = ((uint32_t)buffered_uint16(brng_state, bcnt, buf)) * rng_excl; + leftover = m & 0xFFFFUL; + } + } + + return (m >> 16); +} + +/* Static `Lemire rejection` function called by + * random_buffered_bounded_uint8(...) */ +static NPY_INLINE uint8_t buffered_bounded_lemire_uint8(brng_t *brng_state, + uint8_t rng, int *bcnt, + uint32_t *buf) { + /* + * Uses Lemire's algorithm - https://arxiv.org/abs/1805.10941 + * + * Note: `rng` should not be 0xFF. When this happens `rng_excl` becomes + * zero. + */ + const uint8_t rng_excl = rng + 1; + + uint16_t m; + uint8_t leftover; + + /* Generate a scaled random number. */ + m = ((uint16_t)buffered_uint8(brng_state, bcnt, buf)) * rng_excl; + + /* Rejection sampling to remove any bias */ + leftover = m & 0xFFUL; + + if (leftover < rng_excl) { + /* `rng_excl` is a simple upper bound for `threshold`. */ + const uint8_t threshold = -rng_excl % rng_excl; + /* Same as: threshold=((uint16_t)(0x100ULL - rng_excl)) % rng_excl; */ + + while (leftover < threshold) { + m = ((uint16_t)buffered_uint8(brng_state, bcnt, buf)) * rng_excl; + leftover = m & 0xFFUL; + } + } + + return (m >> 8); +} + +/* + * Returns a single random npy_uint64 between off and off + rng + * inclusive. The numbers wrap if rng is sufficiently large. + */ +uint64_t random_bounded_uint64(brng_t *brng_state, uint64_t off, uint64_t rng, + uint64_t mask, bool use_masked) { + if (rng == 0) { + return off; + } else if (rng < 0xFFFFFFFFUL) { + /* Call 32-bit generator if range in 32-bit. */ + if (use_masked) { + return off + + buffered_bounded_masked_uint32(brng_state, rng, mask, NULL, NULL); + } else { + return off + buffered_bounded_lemire_uint32(brng_state, rng, NULL, NULL); + } + } else if (rng == 0xFFFFFFFFFFFFFFFFULL) { + /* Lemire64 doesn't support inclusive rng = 0xFFFFFFFFFFFFFFFF. */ + return off + next_uint64(brng_state); + } else { + if (use_masked) { + return off + bounded_masked_uint64(brng_state, rng, mask); + } else { + return off + bounded_lemire_uint64(brng_state, rng); + } + } +} + +/* + * Returns a single random npy_uint64 between off and off + rng + * inclusive. The numbers wrap if rng is sufficiently large. + */ +uint32_t random_buffered_bounded_uint32(brng_t *brng_state, uint32_t off, + uint32_t rng, uint32_t mask, + bool use_masked, int *bcnt, + uint32_t *buf) { + /* + * Unused bcnt and buf are here only to allow templating with other uint + * generators. + */ + if (rng == 0) { + return off; + } else if (rng == 0xFFFFFFFFUL) { + /* Lemire32 doesn't support inclusive rng = 0xFFFFFFFF. */ + return off + next_uint32(brng_state); + } else { + if (use_masked) { + return off + + buffered_bounded_masked_uint32(brng_state, rng, mask, bcnt, buf); + } else { + return off + buffered_bounded_lemire_uint32(brng_state, rng, bcnt, buf); + } + } +} + +/* + * Returns a single random npy_uint16 between off and off + rng + * inclusive. The numbers wrap if rng is sufficiently large. + */ +uint16_t random_buffered_bounded_uint16(brng_t *brng_state, uint16_t off, + uint16_t rng, uint16_t mask, + bool use_masked, int *bcnt, + uint32_t *buf) { + if (rng == 0) { + return off; + } else if (rng == 0xFFFFUL) { + /* Lemire16 doesn't support inclusive rng = 0xFFFF. */ + return off + buffered_uint16(brng_state, bcnt, buf); + } else { + if (use_masked) { + return off + + buffered_bounded_masked_uint16(brng_state, rng, mask, bcnt, buf); + } else { + return off + buffered_bounded_lemire_uint16(brng_state, rng, bcnt, buf); + } + } +} + +/* + * Returns a single random npy_uint8 between off and off + rng + * inclusive. The numbers wrap if rng is sufficiently large. + */ +uint8_t random_buffered_bounded_uint8(brng_t *brng_state, uint8_t off, + uint8_t rng, uint8_t mask, + bool use_masked, int *bcnt, + uint32_t *buf) { + if (rng == 0) { + return off; + } else if (rng == 0xFFUL) { + /* Lemire8 doesn't support inclusive rng = 0xFF. */ + return off + buffered_uint8(brng_state, bcnt, buf); + } else { + if (use_masked) { + return off + + buffered_bounded_masked_uint8(brng_state, rng, mask, bcnt, buf); + } else { + return off + buffered_bounded_lemire_uint8(brng_state, rng, bcnt, buf); + } + } +} + +static NPY_INLINE npy_bool buffered_bounded_bool(brng_t *brng_state, + npy_bool off, npy_bool rng, + npy_bool mask, int *bcnt, + uint32_t *buf) { + if (rng == 0) + return off; + if (!(bcnt[0])) { + buf[0] = next_uint32(brng_state); + bcnt[0] = 31; + } else { + buf[0] >>= 1; + bcnt[0] -= 1; + } + return (buf[0] & 0x00000001UL) != 0; +} + +npy_bool random_buffered_bounded_bool(brng_t *brng_state, npy_bool off, + npy_bool rng, npy_bool mask, + bool use_masked, int *bcnt, + uint32_t *buf) { + return buffered_bounded_bool(brng_state, off, rng, mask, bcnt, buf); +} + +/* + * Fills an array with cnt random npy_uint64 between off and off + rng + * inclusive. The numbers wrap if rng is sufficiently large. + */ +void random_bounded_uint64_fill(brng_t *brng_state, uint64_t off, uint64_t rng, + npy_intp cnt, bool use_masked, uint64_t *out) { + npy_intp i; + + if (rng == 0) { + for (i = 0; i < cnt; i++) { + out[i] = off; + } + } else if (rng < 0xFFFFFFFFUL) { + uint32_t buf = 0; + int bcnt = 0; + + /* Call 32-bit generator if range in 32-bit. */ + if (use_masked) { + /* Smallest bit mask >= max */ + uint64_t mask = gen_mask(rng); + + for (i = 0; i < cnt; i++) { + out[i] = off + buffered_bounded_masked_uint32(brng_state, rng, mask, + &bcnt, &buf); + } + } else { + for (i = 0; i < cnt; i++) { + out[i] = + off + buffered_bounded_lemire_uint32(brng_state, rng, &bcnt, &buf); + } + } + } else if (rng == 0xFFFFFFFFFFFFFFFFULL) { + /* Lemire64 doesn't support rng = 0xFFFFFFFFFFFFFFFF. */ + for (i = 0; i < cnt; i++) { + out[i] = off + next_uint64(brng_state); + } + } else { + if (use_masked) { + /* Smallest bit mask >= max */ + uint64_t mask = gen_mask(rng); + + for (i = 0; i < cnt; i++) { + out[i] = off + bounded_masked_uint64(brng_state, rng, mask); + } + } else { + for (i = 0; i < cnt; i++) { + out[i] = off + bounded_lemire_uint64(brng_state, rng); + } + } + } +} + +/* + * Fills an array with cnt random npy_uint32 between off and off + rng + * inclusive. The numbers wrap if rng is sufficiently large. + */ +void random_bounded_uint32_fill(brng_t *brng_state, uint32_t off, uint32_t rng, + npy_intp cnt, bool use_masked, uint32_t *out) { + npy_intp i; + uint32_t buf = 0; + int bcnt = 0; + + if (rng == 0) { + for (i = 0; i < cnt; i++) { + out[i] = off; + } + } else if (rng == 0xFFFFFFFFUL) { + /* Lemire32 doesn't support rng = 0xFFFFFFFF. */ + for (i = 0; i < cnt; i++) { + out[i] = off + next_uint32(brng_state); + } + } else { + if (use_masked) { + /* Smallest bit mask >= max */ + uint32_t mask = (uint32_t)gen_mask(rng); + + for (i = 0; i < cnt; i++) { + out[i] = off + buffered_bounded_masked_uint32(brng_state, rng, mask, + &bcnt, &buf); + } + } else { + for (i = 0; i < cnt; i++) { + out[i] = + off + buffered_bounded_lemire_uint32(brng_state, rng, &bcnt, &buf); + } + } + } +} + +/* + * Fills an array with cnt random npy_uint16 between off and off + rng + * inclusive. The numbers wrap if rng is sufficiently large. + */ +void random_bounded_uint16_fill(brng_t *brng_state, uint16_t off, uint16_t rng, + npy_intp cnt, bool use_masked, uint16_t *out) { + npy_intp i; + uint32_t buf = 0; + int bcnt = 0; + + if (rng == 0) { + for (i = 0; i < cnt; i++) { + out[i] = off; + } + } else if (rng == 0xFFFFUL) { + /* Lemire16 doesn't support rng = 0xFFFF. */ + for (i = 0; i < cnt; i++) { + out[i] = off + buffered_uint16(brng_state, &bcnt, &buf); + } + } else { + if (use_masked) { + /* Smallest bit mask >= max */ + uint16_t mask = (uint16_t)gen_mask(rng); + + for (i = 0; i < cnt; i++) { + out[i] = off + buffered_bounded_masked_uint16(brng_state, rng, mask, + &bcnt, &buf); + } + } else { + for (i = 0; i < cnt; i++) { + out[i] = + off + buffered_bounded_lemire_uint16(brng_state, rng, &bcnt, &buf); + } + } + } +} + +/* + * Fills an array with cnt random npy_uint8 between off and off + rng + * inclusive. The numbers wrap if rng is sufficiently large. + */ +void random_bounded_uint8_fill(brng_t *brng_state, uint8_t off, uint8_t rng, + npy_intp cnt, bool use_masked, uint8_t *out) { + npy_intp i; + uint32_t buf = 0; + int bcnt = 0; + + if (rng == 0) { + for (i = 0; i < cnt; i++) { + out[i] = off; + } + } else if (rng == 0xFFUL) { + /* Lemire8 doesn't support rng = 0xFF. */ + for (i = 0; i < cnt; i++) { + out[i] = off + buffered_uint8(brng_state, &bcnt, &buf); + } + } else { + if (use_masked) { + /* Smallest bit mask >= max */ + uint8_t mask = (uint8_t)gen_mask(rng); + + for (i = 0; i < cnt; i++) { + out[i] = off + buffered_bounded_masked_uint8(brng_state, rng, mask, + &bcnt, &buf); + } + } else { + for (i = 0; i < cnt; i++) { + out[i] = + off + buffered_bounded_lemire_uint8(brng_state, rng, &bcnt, &buf); + } + } + } +} + +/* + * Fills an array with cnt random npy_bool between off and off + rng + * inclusive. + */ +void random_bounded_bool_fill(brng_t *brng_state, npy_bool off, npy_bool rng, + npy_intp cnt, bool use_masked, npy_bool *out) { + npy_bool mask = 0; + npy_intp i; + uint32_t buf = 0; + int bcnt = 0; + + for (i = 0; i < cnt; i++) { + out[i] = buffered_bounded_bool(brng_state, off, rng, mask, &bcnt, &buf); + } +} diff --git a/numpy/random/src/distributions/distributions.h b/numpy/random/src/distributions/distributions.h new file mode 100644 index 000000000..7ca31a16c --- /dev/null +++ b/numpy/random/src/distributions/distributions.h @@ -0,0 +1,220 @@ +#ifndef _RANDOMDGEN__DISTRIBUTIONS_H_ +#define _RANDOMDGEN__DISTRIBUTIONS_H_ + +#pragma once +#include <stddef.h> +#ifdef _WIN32 +#if _MSC_VER == 1500 +#include "../common/stdint.h" +typedef int bool; +#define false 0 +#define true 1 +#else +#include <stdbool.h> +#include <stdint.h> +#endif +#else +#include <stdbool.h> +#include <stdint.h> +#endif + +#include "Python.h" +#include "numpy/npy_common.h" +#include "numpy/npy_math.h" + +#ifdef _WIN32 +#if _MSC_VER == 1500 + +static NPY_INLINE int64_t llabs(int64_t x) { + int64_t o; + if (x < 0) { + o = -x; + } else { + o = x; + } + return o; +} +#endif +#endif + +#ifdef DLL_EXPORT +#define DECLDIR __declspec(dllexport) +#else +#define DECLDIR extern +#endif + +#ifndef MIN +#define MIN(x, y) (((x) < (y)) ? x : y) +#define MAX(x, y) (((x) > (y)) ? x : y) +#endif + +#ifndef M_PI +#define M_PI 3.14159265358979323846264338328 +#endif + +typedef struct s_binomial_t { + int has_binomial; /* !=0: following parameters initialized for binomial */ + double psave; + int64_t nsave; + double r; + double q; + double fm; + int64_t m; + double p1; + double xm; + double xl; + double xr; + double c; + double laml; + double lamr; + double p2; + double p3; + double p4; +} binomial_t; + +typedef struct brng { + void *state; + uint64_t (*next_uint64)(void *st); + uint32_t (*next_uint32)(void *st); + double (*next_double)(void *st); + uint64_t (*next_raw)(void *st); +} brng_t; + +/* Inline generators for internal use */ +static NPY_INLINE uint32_t next_uint32(brng_t *brng_state) { + return brng_state->next_uint32(brng_state->state); +} + +static NPY_INLINE uint64_t next_uint64(brng_t *brng_state) { + return brng_state->next_uint64(brng_state->state); +} + +static NPY_INLINE float next_float(brng_t *brng_state) { + return (next_uint32(brng_state) >> 9) * (1.0f / 8388608.0f); +} + +static NPY_INLINE double next_double(brng_t *brng_state) { + return brng_state->next_double(brng_state->state); +} + +DECLDIR float random_float(brng_t *brng_state); +DECLDIR double random_double(brng_t *brng_state); +DECLDIR void random_double_fill(brng_t *brng_state, npy_intp cnt, double *out); + +DECLDIR int64_t random_positive_int64(brng_t *brng_state); +DECLDIR int32_t random_positive_int32(brng_t *brng_state); +DECLDIR int64_t random_positive_int(brng_t *brng_state); +DECLDIR uint64_t random_uint(brng_t *brng_state); + +DECLDIR double random_standard_exponential(brng_t *brng_state); +DECLDIR void random_standard_exponential_fill(brng_t *brng_state, npy_intp cnt, + double *out); +DECLDIR float random_standard_exponential_f(brng_t *brng_state); +DECLDIR double random_standard_exponential_zig(brng_t *brng_state); +DECLDIR void random_standard_exponential_zig_fill(brng_t *brng_state, + npy_intp cnt, double *out); +DECLDIR float random_standard_exponential_zig_f(brng_t *brng_state); + +/* +DECLDIR double random_gauss(brng_t *brng_state); +DECLDIR float random_gauss_f(brng_t *brng_state); +*/ +DECLDIR double random_gauss_zig(brng_t *brng_state); +DECLDIR float random_gauss_zig_f(brng_t *brng_state); +DECLDIR void random_gauss_zig_fill(brng_t *brng_state, npy_intp cnt, + double *out); + +/* +DECLDIR double random_standard_gamma(brng_t *brng_state, double shape); +DECLDIR float random_standard_gamma_f(brng_t *brng_state, float shape); +*/ +DECLDIR double random_standard_gamma_zig(brng_t *brng_state, double shape); +DECLDIR float random_standard_gamma_zig_f(brng_t *brng_state, float shape); + +/* +DECLDIR double random_normal(brng_t *brng_state, double loc, double scale); +*/ +DECLDIR double random_normal_zig(brng_t *brng_state, double loc, double scale); + +DECLDIR double random_gamma(brng_t *brng_state, double shape, double scale); +DECLDIR float random_gamma_float(brng_t *brng_state, float shape, float scale); + +DECLDIR double random_exponential(brng_t *brng_state, double scale); +DECLDIR double random_uniform(brng_t *brng_state, double lower, double range); +DECLDIR double random_beta(brng_t *brng_state, double a, double b); +DECLDIR double random_chisquare(brng_t *brng_state, double df); +DECLDIR double random_f(brng_t *brng_state, double dfnum, double dfden); +DECLDIR double random_standard_cauchy(brng_t *brng_state); +DECLDIR double random_pareto(brng_t *brng_state, double a); +DECLDIR double random_weibull(brng_t *brng_state, double a); +DECLDIR double random_power(brng_t *brng_state, double a); +DECLDIR double random_laplace(brng_t *brng_state, double loc, double scale); +DECLDIR double random_gumbel(brng_t *brng_state, double loc, double scale); +DECLDIR double random_logistic(brng_t *brng_state, double loc, double scale); +DECLDIR double random_lognormal(brng_t *brng_state, double mean, double sigma); +DECLDIR double random_rayleigh(brng_t *brng_state, double mode); +DECLDIR double random_standard_t(brng_t *brng_state, double df); +DECLDIR double random_noncentral_chisquare(brng_t *brng_state, double df, + double nonc); +DECLDIR double random_noncentral_f(brng_t *brng_state, double dfnum, + double dfden, double nonc); +DECLDIR double random_wald(brng_t *brng_state, double mean, double scale); +DECLDIR double random_vonmises(brng_t *brng_state, double mu, double kappa); +DECLDIR double random_triangular(brng_t *brng_state, double left, double mode, + double right); + +DECLDIR int64_t random_poisson(brng_t *brng_state, double lam); +DECLDIR int64_t random_negative_binomial(brng_t *brng_state, double n, + double p); +DECLDIR int64_t random_binomial(brng_t *brng_state, double p, int64_t n, + binomial_t *binomial); +DECLDIR int64_t random_logseries(brng_t *brng_state, double p); +DECLDIR int64_t random_geometric_search(brng_t *brng_state, double p); +DECLDIR int64_t random_geometric_inversion(brng_t *brng_state, double p); +DECLDIR int64_t random_geometric(brng_t *brng_state, double p); +DECLDIR int64_t random_zipf(brng_t *brng_state, double a); +DECLDIR int64_t random_hypergeometric(brng_t *brng_state, int64_t good, + int64_t bad, int64_t sample); + +DECLDIR uint64_t random_interval(brng_t *brng_state, uint64_t max); + +/* Generate random uint64 numbers in closed interval [off, off + rng]. */ +DECLDIR uint64_t random_bounded_uint64(brng_t *brng_state, uint64_t off, + uint64_t rng, uint64_t mask, + bool use_masked); + +/* Generate random uint32 numbers in closed interval [off, off + rng]. */ +DECLDIR uint32_t random_buffered_bounded_uint32(brng_t *brng_state, + uint32_t off, uint32_t rng, + uint32_t mask, bool use_masked, + int *bcnt, uint32_t *buf); +DECLDIR uint16_t random_buffered_bounded_uint16(brng_t *brng_state, + uint16_t off, uint16_t rng, + uint16_t mask, bool use_masked, + int *bcnt, uint32_t *buf); +DECLDIR uint8_t random_buffered_bounded_uint8(brng_t *brng_state, uint8_t off, + uint8_t rng, uint8_t mask, + bool use_masked, int *bcnt, + uint32_t *buf); +DECLDIR npy_bool random_buffered_bounded_bool(brng_t *brng_state, npy_bool off, + npy_bool rng, npy_bool mask, + bool use_masked, int *bcnt, + uint32_t *buf); + +DECLDIR void random_bounded_uint64_fill(brng_t *brng_state, uint64_t off, + uint64_t rng, npy_intp cnt, + bool use_masked, uint64_t *out); +DECLDIR void random_bounded_uint32_fill(brng_t *brng_state, uint32_t off, + uint32_t rng, npy_intp cnt, + bool use_masked, uint32_t *out); +DECLDIR void random_bounded_uint16_fill(brng_t *brng_state, uint16_t off, + uint16_t rng, npy_intp cnt, + bool use_masked, uint16_t *out); +DECLDIR void random_bounded_uint8_fill(brng_t *brng_state, uint8_t off, + uint8_t rng, npy_intp cnt, + bool use_masked, uint8_t *out); +DECLDIR void random_bounded_bool_fill(brng_t *brng_state, npy_bool off, + npy_bool rng, npy_intp cnt, + bool use_masked, npy_bool *out); + +#endif diff --git a/numpy/random/src/distributions/ziggurat.h b/numpy/random/src/distributions/ziggurat.h new file mode 100644 index 000000000..7808c0e68 --- /dev/null +++ b/numpy/random/src/distributions/ziggurat.h @@ -0,0 +1,276 @@ +/* + * Constants from Julia's Ziggurat implementation + */ + +static const uint64_t ki[] = { + 0x0007799ec012f7b2, 0x0000000000000000, 0x0006045f4c7de363, + 0x0006d1aa7d5ec0a5, 0x000728fb3f60f777, 0x0007592af4e9fbc0, + 0x000777a5c0bf655d, 0x00078ca3857d2256, 0x00079bf6b0ffe58b, + 0x0007a7a34ab092ad, 0x0007b0d2f20dd1cb, 0x0007b83d3aa9cb52, + 0x0007be597614224d, 0x0007c3788631abe9, 0x0007c7d32bc192ee, + 0x0007cb9263a6e86d, 0x0007ced483edfa84, 0x0007d1b07ac0fd39, + 0x0007d437ef2da5fc, 0x0007d678b069aa6e, 0x0007d87db38c5c87, + 0x0007da4fc6a9ba62, 0x0007dbf611b37f3b, 0x0007dd7674d0f286, + 0x0007ded5ce8205f6, 0x0007e018307fb62b, 0x0007e141081bd124, + 0x0007e2533d712de8, 0x0007e3514bbd7718, 0x0007e43d54944b52, + 0x0007e5192f25ef42, 0x0007e5e67481118d, 0x0007e6a6897c1ce2, + 0x0007e75aa6c7f64c, 0x0007e803df8ee498, 0x0007e8a326eb6272, + 0x0007e93954717a28, 0x0007e9c727f8648f, 0x0007ea4d4cc85a3c, + 0x0007eacc5c4907a9, 0x0007eb44e0474cf6, 0x0007ebb754e47419, + 0x0007ec242a3d8474, 0x0007ec8bc5d69645, 0x0007ecee83d3d6e9, + 0x0007ed4cb8082f45, 0x0007eda6aee0170f, 0x0007edfcae2dfe68, + 0x0007ee4ef5dccd3e, 0x0007ee9dc08c394e, 0x0007eee9441a17c7, + 0x0007ef31b21b4fb1, 0x0007ef773846a8a7, 0x0007efba00d35a17, + 0x0007effa32ccf69f, 0x0007f037f25e1278, 0x0007f0736112d12c, + 0x0007f0ac9e145c25, 0x0007f0e3c65e1fcc, 0x0007f118f4ed8e54, + 0x0007f14c42ed0dc8, 0x0007f17dc7daa0c3, 0x0007f1ad99aac6a5, + 0x0007f1dbcce80015, 0x0007f20874cf56bf, 0x0007f233a36a3b9a, + 0x0007f25d69a604ad, 0x0007f285d7694a92, 0x0007f2acfba75e3b, + 0x0007f2d2e4720909, 0x0007f2f79f09c344, 0x0007f31b37ec883b, + 0x0007f33dbae36abc, 0x0007f35f330f08d5, 0x0007f37faaf2fa79, + 0x0007f39f2c805380, 0x0007f3bdc11f4f1c, 0x0007f3db71b83850, + 0x0007f3f846bba121, 0x0007f4144829f846, 0x0007f42f7d9a8b9d, + 0x0007f449ee420432, 0x0007f463a0f8675e, 0x0007f47c9c3ea77b, + 0x0007f494e643cd8e, 0x0007f4ac84e9c475, 0x0007f4c37dc9cd50, + 0x0007f4d9d638a432, 0x0007f4ef934a5b6a, 0x0007f504b9d5f33d, + 0x0007f5194e78b352, 0x0007f52d55994a96, 0x0007f540d36aba0c, + 0x0007f553cbef0e77, 0x0007f56642f9ec8f, 0x0007f5783c32f31e, + 0x0007f589bb17f609, 0x0007f59ac2ff1525, 0x0007f5ab5718b15a, + 0x0007f5bb7a71427c, 0x0007f5cb2ff31009, 0x0007f5da7a67cebe, + 0x0007f5e95c7a24e7, 0x0007f5f7d8b7171e, 0x0007f605f18f5ef4, + 0x0007f613a958ad0a, 0x0007f621024ed7e9, 0x0007f62dfe94f8cb, + 0x0007f63aa036777a, 0x0007f646e928065a, 0x0007f652db488f88, + 0x0007f65e786213ff, 0x0007f669c22a7d8a, 0x0007f674ba446459, + 0x0007f67f623fc8db, 0x0007f689bb9ac294, 0x0007f693c7c22481, + 0x0007f69d881217a6, 0x0007f6a6fdd6ac36, 0x0007f6b02a4c61ee, + 0x0007f6b90ea0a7f4, 0x0007f6c1abf254c0, 0x0007f6ca03521664, + 0x0007f6d215c2db82, 0x0007f6d9e43a3559, 0x0007f6e16fa0b329, + 0x0007f6e8b8d23729, 0x0007f6efc09e4569, 0x0007f6f687c84cbf, + 0x0007f6fd0f07ea09, 0x0007f703570925e2, 0x0007f709606cad03, + 0x0007f70f2bc8036f, 0x0007f714b9a5b292, 0x0007f71a0a85725d, + 0x0007f71f1edc4d9e, 0x0007f723f714c179, 0x0007f728938ed843, + 0x0007f72cf4a03fa0, 0x0007f7311a945a16, 0x0007f73505ac4bf8, + 0x0007f738b61f03bd, 0x0007f73c2c193dc0, 0x0007f73f67bd835c, + 0x0007f74269242559, 0x0007f745305b31a1, 0x0007f747bd666428, + 0x0007f74a103f12ed, 0x0007f74c28d414f5, 0x0007f74e0709a42d, + 0x0007f74faab939f9, 0x0007f75113b16657, 0x0007f75241b5a155, + 0x0007f753347e16b8, 0x0007f753ebb76b7c, 0x0007f75467027d05, + 0x0007f754a5f4199d, 0x0007f754a814b207, 0x0007f7546ce003ae, + 0x0007f753f3c4bb29, 0x0007f7533c240e92, 0x0007f75245514f41, + 0x0007f7510e91726c, 0x0007f74f971a9012, 0x0007f74dde135797, + 0x0007f74be2927971, 0x0007f749a39e051c, 0x0007f747202aba8a, + 0x0007f744571b4e3c, 0x0007f741473f9efe, 0x0007f73def53dc43, + 0x0007f73a4dff9bff, 0x0007f73661d4deaf, 0x0007f732294f003f, + 0x0007f72da2d19444, 0x0007f728cca72bda, 0x0007f723a5000367, + 0x0007f71e29f09627, 0x0007f7185970156b, 0x0007f7123156c102, + 0x0007f70baf5c1e2c, 0x0007f704d1150a23, 0x0007f6fd93f1a4e5, + 0x0007f6f5f53b10b6, 0x0007f6edf211023e, 0x0007f6e587671ce9, + 0x0007f6dcb2021679, 0x0007f6d36e749c64, 0x0007f6c9b91bf4c6, + 0x0007f6bf8e1c541b, 0x0007f6b4e95ce015, 0x0007f6a9c68356ff, + 0x0007f69e20ef5211, 0x0007f691f3b517eb, 0x0007f6853997f321, + 0x0007f677ed03ff19, 0x0007f66a08075bdc, 0x0007f65b844ab75a, + 0x0007f64c5b091860, 0x0007f63c8506d4bc, 0x0007f62bfa8798fe, + 0x0007f61ab34364b0, 0x0007f608a65a599a, 0x0007f5f5ca4737e8, + 0x0007f5e214d05b48, 0x0007f5cd7af7066e, 0x0007f5b7f0e4c2a1, + 0x0007f5a169d68fcf, 0x0007f589d80596a5, 0x0007f5712c8d0174, + 0x0007f557574c912b, 0x0007f53c46c77193, 0x0007f51fe7feb9f2, + 0x0007f5022646ecfb, 0x0007f4e2eb17ab1d, 0x0007f4c21dd4a3d1, + 0x0007f49fa38ea394, 0x0007f47b5ebb62eb, 0x0007f4552ee27473, + 0x0007f42cf03d58f5, 0x0007f4027b48549f, 0x0007f3d5a44119df, + 0x0007f3a63a8fb552, 0x0007f37408155100, 0x0007f33ed05b55ec, + 0x0007f3064f9c183e, 0x0007f2ca399c7ba1, 0x0007f28a384bb940, + 0x0007f245ea1b7a2b, 0x0007f1fcdffe8f1b, 0x0007f1ae9af758cd, + 0x0007f15a8917f27e, 0x0007f10001ccaaab, 0x0007f09e413c418a, + 0x0007f034627733d7, 0x0007efc15815b8d5, 0x0007ef43e2bf7f55, + 0x0007eeba84e31dfe, 0x0007ee237294df89, 0x0007ed7c7c170141, + 0x0007ecc2f0d95d3a, 0x0007ebf377a46782, 0x0007eb09d6deb285, + 0x0007ea00a4f17808, 0x0007e8d0d3da63d6, 0x0007e771023b0fcf, + 0x0007e5d46c2f08d8, 0x0007e3e937669691, 0x0007e195978f1176, + 0x0007deb2c0e05c1c, 0x0007db0362002a19, 0x0007d6202c151439, + 0x0007cf4b8f00a2cb, 0x0007c4fd24520efd, 0x0007b362fbf81816, + 0x00078d2d25998e24}; + +static const double wi[] = { + 1.7367254121602630e-15, 9.5586603514556339e-17, 1.2708704834810623e-16, + 1.4909740962495474e-16, 1.6658733631586268e-16, 1.8136120810119029e-16, + 1.9429720153135588e-16, 2.0589500628482093e-16, 2.1646860576895422e-16, + 2.2622940392218116e-16, 2.3532718914045892e-16, 2.4387234557428771e-16, + 2.5194879829274225e-16, 2.5962199772528103e-16, 2.6694407473648285e-16, + 2.7395729685142446e-16, 2.8069646002484804e-16, 2.8719058904113930e-16, + 2.9346417484728883e-16, 2.9953809336782113e-16, 3.0543030007192440e-16, + 3.1115636338921572e-16, 3.1672988018581815e-16, 3.2216280350549905e-16, + 3.2746570407939751e-16, 3.3264798116841710e-16, 3.3771803417353232e-16, + 3.4268340353119356e-16, 3.4755088731729758e-16, 3.5232663846002031e-16, + 3.5701624633953494e-16, 3.6162480571598339e-16, 3.6615697529653540e-16, + 3.7061702777236077e-16, 3.7500889278747798e-16, 3.7933619401549554e-16, + 3.8360228129677279e-16, 3.8781025861250247e-16, 3.9196300853257678e-16, + 3.9606321366256378e-16, 4.0011337552546690e-16, 4.0411583124143332e-16, + 4.0807276830960448e-16, 4.1198623774807442e-16, 4.1585816580828064e-16, + 4.1969036444740733e-16, 4.2348454071520708e-16, 4.2724230518899761e-16, + 4.3096517957162941e-16, 4.3465460355128760e-16, 4.3831194100854571e-16, + 4.4193848564470665e-16, 4.4553546609579137e-16, 4.4910405058828750e-16, + 4.5264535118571397e-16, 4.5616042766900381e-16, 4.5965029108849407e-16, + 4.6311590702081647e-16, 4.6655819856008752e-16, 4.6997804906941950e-16, + 4.7337630471583237e-16, 4.7675377680908526e-16, 4.8011124396270155e-16, + 4.8344945409350080e-16, 4.8676912627422087e-16, 4.9007095245229938e-16, + 4.9335559904654139e-16, 4.9662370843221783e-16, 4.9987590032409088e-16, + 5.0311277306593187e-16, 5.0633490483427195e-16, 5.0954285476338923e-16, + 5.1273716399787966e-16, 5.1591835667857364e-16, 5.1908694086703434e-16, + 5.2224340941340417e-16, 5.2538824077194543e-16, 5.2852189976823820e-16, + 5.3164483832166176e-16, 5.3475749612647295e-16, 5.3786030129452348e-16, + 5.4095367096239933e-16, 5.4403801186554671e-16, 5.4711372088173611e-16, + 5.5018118554603362e-16, 5.5324078453927836e-16, 5.5629288815190902e-16, + 5.5933785872484621e-16, 5.6237605106900435e-16, 5.6540781286489604e-16, + 5.6843348504368141e-16, 5.7145340215092040e-16, 5.7446789269419609e-16, + 5.7747727947569648e-16, 5.8048187991076857e-16, 5.8348200633338921e-16, + 5.8647796628943653e-16, 5.8947006281858718e-16, 5.9245859472561339e-16, + 5.9544385684180598e-16, 5.9842614027720281e-16, 6.0140573266426640e-16, + 6.0438291839361250e-16, 6.0735797884236057e-16, 6.1033119259564394e-16, + 6.1330283566179110e-16, 6.1627318168165963e-16, 6.1924250213258470e-16, + 6.2221106652737879e-16, 6.2517914260879998e-16, 6.2814699653988953e-16, + 6.3111489309056042e-16, 6.3408309582080600e-16, 6.3705186726088149e-16, + 6.4002146908880247e-16, 6.4299216230548961e-16, 6.4596420740788321e-16, + 6.4893786456033965e-16, 6.5191339376461587e-16, 6.5489105502874154e-16, + 6.5787110853507413e-16, 6.6085381480782587e-16, 6.6383943488035057e-16, + 6.6682823046247459e-16, 6.6982046410815579e-16, 6.7281639938375311e-16, + 6.7581630103719006e-16, 6.7882043516829803e-16, 6.8182906940062540e-16, + 6.8484247305500383e-16, 6.8786091732516637e-16, 6.9088467545571690e-16, + 6.9391402292275690e-16, 6.9694923761748294e-16, 6.9999060003307640e-16, + 7.0303839345521508e-16, 7.0609290415654822e-16, 7.0915442159548734e-16, + 7.1222323861967788e-16, 7.1529965167453030e-16, 7.1838396101720629e-16, + 7.2147647093647067e-16, 7.2457748997883870e-16, 7.2768733118146927e-16, + 7.3080631231227429e-16, 7.3393475611774048e-16, 7.3707299057898310e-16, + 7.4022134917657997e-16, 7.4338017116476479e-16, 7.4654980185558890e-16, + 7.4973059291369793e-16, 7.5292290266240584e-16, 7.5612709640179217e-16, + 7.5934354673958895e-16, 7.6257263393567558e-16, 7.6581474626104873e-16, + 7.6907028037219191e-16, 7.7233964170182985e-16, 7.7562324486711744e-16, + 7.7892151409638524e-16, 7.8223488367564108e-16, 7.8556379841610841e-16, + 7.8890871414417552e-16, 7.9227009821522709e-16, 7.9564843005293662e-16, + 7.9904420171571300e-16, 8.0245791849212591e-16, 8.0589009952726568e-16, + 8.0934127848215009e-16, 8.1281200422845008e-16, 8.1630284158098775e-16, + 8.1981437207065329e-16, 8.2334719476060504e-16, 8.2690192710884700e-16, + 8.3047920588053737e-16, 8.3407968811366288e-16, 8.3770405214202216e-16, + 8.4135299867980282e-16, 8.4502725197240968e-16, 8.4872756101861549e-16, + 8.5245470086955962e-16, 8.5620947401062333e-16, 8.5999271183276646e-16, + 8.6380527620052589e-16, 8.6764806112455816e-16, 8.7152199454736980e-16, + 8.7542804025171749e-16, 8.7936719990210427e-16, 8.8334051523084080e-16, + 8.8734907038131345e-16, 8.9139399442240861e-16, 8.9547646404950677e-16, + 8.9959770648910994e-16, 9.0375900262601175e-16, 9.0796169037400680e-16, + 9.1220716831348461e-16, 9.1649689962191353e-16, 9.2083241632623076e-16, + 9.2521532390956933e-16, 9.2964730630864167e-16, 9.3413013134252651e-16, + 9.3866565661866598e-16, 9.4325583596767065e-16, 9.4790272646517382e-16, + 9.5260849610662787e-16, 9.5737543220974496e-16, 9.6220595062948384e-16, + 9.6710260588230542e-16, 9.7206810229016259e-16, 9.7710530627072088e-16, + 9.8221725991905411e-16, 9.8740719604806711e-16, 9.9267855488079765e-16, + 9.9803500261836449e-16, 1.0034804521436181e-15, 1.0090190861637457e-15, + 1.0146553831467086e-15, 1.0203941464683124e-15, 1.0262405372613567e-15, + 1.0322001115486456e-15, 1.0382788623515399e-15, 1.0444832676000471e-15, + 1.0508203448355195e-15, 1.0572977139009890e-15, 1.0639236690676801e-15, + 1.0707072623632994e-15, 1.0776584002668106e-15, 1.0847879564403425e-15, + 1.0921079038149563e-15, 1.0996314701785628e-15, 1.1073733224935752e-15, + 1.1153497865853155e-15, 1.1235791107110833e-15, 1.1320817840164846e-15, + 1.1408809242582780e-15, 1.1500027537839792e-15, 1.1594771891449189e-15, + 1.1693385786910960e-15, 1.1796266352955801e-15, 1.1903876299282890e-15, + 1.2016759392543819e-15, 1.2135560818666897e-15, 1.2261054417450561e-15, + 1.2394179789163251e-15, 1.2536093926602567e-15, 1.2688244814255010e-15, + 1.2852479319096109e-15, 1.3031206634689985e-15, 1.3227655770195326e-15, + 1.3446300925011171e-15, 1.3693606835128518e-15, 1.3979436672775240e-15, + 1.4319989869661328e-15, 1.4744848603597596e-15, 1.5317872741611144e-15, + 1.6227698675312968e-15}; + +static const double fi[] = { + 1.0000000000000000e+00, 9.7710170126767082e-01, 9.5987909180010600e-01, + 9.4519895344229909e-01, 9.3206007595922991e-01, 9.1999150503934646e-01, + 9.0872644005213032e-01, 8.9809592189834297e-01, 8.8798466075583282e-01, + 8.7830965580891684e-01, 8.6900868803685649e-01, 8.6003362119633109e-01, + 8.5134625845867751e-01, 8.4291565311220373e-01, 8.3471629298688299e-01, + 8.2672683394622093e-01, 8.1892919160370192e-01, 8.1130787431265572e-01, + 8.0384948317096383e-01, 7.9654233042295841e-01, 7.8937614356602404e-01, + 7.8234183265480195e-01, 7.7543130498118662e-01, 7.6863731579848571e-01, + 7.6195334683679483e-01, 7.5537350650709567e-01, 7.4889244721915638e-01, + 7.4250529634015061e-01, 7.3620759812686210e-01, 7.2999526456147568e-01, + 7.2386453346862967e-01, 7.1781193263072152e-01, 7.1183424887824798e-01, + 7.0592850133275376e-01, 7.0009191813651117e-01, 6.9432191612611627e-01, + 6.8861608300467136e-01, 6.8297216164499430e-01, 6.7738803621877308e-01, + 6.7186171989708166e-01, 6.6639134390874977e-01, 6.6097514777666277e-01, + 6.5561147057969693e-01, 6.5029874311081637e-01, 6.4503548082082196e-01, + 6.3982027745305614e-01, 6.3465179928762327e-01, 6.2952877992483625e-01, + 6.2445001554702606e-01, 6.1941436060583399e-01, 6.1442072388891344e-01, + 6.0946806492577310e-01, 6.0455539069746733e-01, 5.9968175261912482e-01, + 5.9484624376798689e-01, 5.9004799633282545e-01, 5.8528617926337090e-01, + 5.8055999610079034e-01, 5.7586868297235316e-01, 5.7121150673525267e-01, + 5.6658776325616389e-01, 5.6199677581452390e-01, 5.5743789361876550e-01, + 5.5291049042583185e-01, 5.4841396325526537e-01, 5.4394773119002582e-01, + 5.3951123425695158e-01, 5.3510393238045717e-01, 5.3072530440366150e-01, + 5.2637484717168403e-01, 5.2205207467232140e-01, 5.1775651722975591e-01, + 5.1348772074732651e-01, 5.0924524599574761e-01, 5.0502866794346790e-01, + 5.0083757512614835e-01, 4.9667156905248933e-01, 4.9253026364386815e-01, + 4.8841328470545758e-01, 4.8432026942668288e-01, 4.8025086590904642e-01, + 4.7620473271950547e-01, 4.7218153846772976e-01, 4.6818096140569321e-01, + 4.6420268904817391e-01, 4.6024641781284248e-01, 4.5631185267871610e-01, + 4.5239870686184824e-01, 4.4850670150720273e-01, 4.4463556539573912e-01, + 4.4078503466580377e-01, 4.3695485254798533e-01, 4.3314476911265209e-01, + 4.2935454102944126e-01, 4.2558393133802180e-01, 4.2183270922949573e-01, + 4.1810064983784795e-01, 4.1438753404089090e-01, 4.1069314827018799e-01, + 4.0701728432947315e-01, 4.0335973922111429e-01, 3.9972031498019700e-01, + 3.9609881851583223e-01, 3.9249506145931540e-01, 3.8890886001878855e-01, + 3.8534003484007706e-01, 3.8178841087339344e-01, 3.7825381724561896e-01, + 3.7473608713789086e-01, 3.7123505766823922e-01, 3.6775056977903225e-01, + 3.6428246812900372e-01, 3.6083060098964775e-01, 3.5739482014578022e-01, + 3.5397498080007656e-01, 3.5057094148140588e-01, 3.4718256395679348e-01, + 3.4380971314685055e-01, 3.4045225704452164e-01, 3.3711006663700588e-01, + 3.3378301583071823e-01, 3.3047098137916342e-01, 3.2717384281360129e-01, + 3.2389148237639104e-01, 3.2062378495690530e-01, 3.1737063802991350e-01, + 3.1413193159633707e-01, 3.1090755812628634e-01, 3.0769741250429189e-01, + 3.0450139197664983e-01, 3.0131939610080288e-01, 2.9815132669668531e-01, + 2.9499708779996164e-01, 2.9185658561709499e-01, 2.8872972848218270e-01, + 2.8561642681550159e-01, 2.8251659308370741e-01, 2.7943014176163772e-01, + 2.7635698929566810e-01, 2.7329705406857691e-01, 2.7025025636587519e-01, + 2.6721651834356114e-01, 2.6419576399726080e-01, 2.6118791913272082e-01, + 2.5819291133761890e-01, 2.5521066995466168e-01, 2.5224112605594190e-01, + 2.4928421241852824e-01, 2.4633986350126363e-01, 2.4340801542275012e-01, + 2.4048860594050039e-01, 2.3758157443123795e-01, 2.3468686187232990e-01, + 2.3180441082433859e-01, 2.2893416541468023e-01, 2.2607607132238020e-01, + 2.2323007576391746e-01, 2.2039612748015194e-01, 2.1757417672433113e-01, + 2.1476417525117358e-01, 2.1196607630703015e-01, 2.0917983462112499e-01, + 2.0640540639788071e-01, 2.0364274931033485e-01, 2.0089182249465656e-01, + 1.9815258654577511e-01, 1.9542500351413428e-01, 1.9270903690358912e-01, + 1.9000465167046496e-01, 1.8731181422380025e-01, 1.8463049242679927e-01, + 1.8196065559952254e-01, 1.7930227452284767e-01, 1.7665532144373500e-01, + 1.7401977008183875e-01, 1.7139559563750595e-01, 1.6878277480121151e-01, + 1.6618128576448205e-01, 1.6359110823236570e-01, 1.6101222343751107e-01, + 1.5844461415592431e-01, 1.5588826472447920e-01, 1.5334316106026283e-01, + 1.5080929068184568e-01, 1.4828664273257453e-01, 1.4577520800599403e-01, + 1.4327497897351341e-01, 1.4078594981444470e-01, 1.3830811644855071e-01, + 1.3584147657125373e-01, 1.3338602969166913e-01, 1.3094177717364430e-01, + 1.2850872227999952e-01, 1.2608687022018586e-01, 1.2367622820159654e-01, + 1.2127680548479021e-01, 1.1888861344290998e-01, 1.1651166562561080e-01, + 1.1414597782783835e-01, 1.1179156816383801e-01, 1.0944845714681163e-01, + 1.0711666777468364e-01, 1.0479622562248690e-01, 1.0248715894193508e-01, + 1.0018949876880981e-01, 9.7903279038862284e-02, 9.5628536713008819e-02, + 9.3365311912690860e-02, 9.1113648066373634e-02, 8.8873592068275789e-02, + 8.6645194450557961e-02, 8.4428509570353374e-02, 8.2223595813202863e-02, + 8.0030515814663056e-02, 7.7849336702096039e-02, 7.5680130358927067e-02, + 7.3522973713981268e-02, 7.1377949058890375e-02, 6.9245144397006769e-02, + 6.7124653827788497e-02, 6.5016577971242842e-02, 6.2921024437758113e-02, + 6.0838108349539864e-02, 5.8767952920933758e-02, 5.6710690106202902e-02, + 5.4666461324888914e-02, 5.2635418276792176e-02, 5.0617723860947761e-02, + 4.8613553215868521e-02, 4.6623094901930368e-02, 4.4646552251294443e-02, + 4.2684144916474431e-02, 4.0736110655940933e-02, 3.8802707404526113e-02, + 3.6884215688567284e-02, 3.4980941461716084e-02, 3.3093219458578522e-02, + 3.1221417191920245e-02, 2.9365939758133314e-02, 2.7527235669603082e-02, + 2.5705804008548896e-02, 2.3902203305795882e-02, 2.2117062707308864e-02, + 2.0351096230044517e-02, 1.8605121275724643e-02, 1.6880083152543166e-02, + 1.5177088307935325e-02, 1.3497450601739880e-02, 1.1842757857907888e-02, + 1.0214971439701471e-02, 8.6165827693987316e-03, 7.0508754713732268e-03, + 5.5224032992509968e-03, 4.0379725933630305e-03, 2.6090727461021627e-03, + 1.2602859304985975e-03}; + +static const double ziggurat_nor_r = 3.6541528853610087963519472518; +static const double ziggurat_nor_inv_r = + 0.27366123732975827203338247596; // 1.0 / ziggurat_nor_r; +static const double ziggurat_exp_r = 7.6971174701310497140446280481; + +static const float ziggurat_nor_r_f = 3.6541528853610087963519472518f; +static const float ziggurat_nor_inv_r_f = 0.27366123732975827203338247596f; +static const float ziggurat_exp_r_f = 7.6971174701310497140446280481f; diff --git a/numpy/random/src/distributions/ziggurat_constants.h b/numpy/random/src/distributions/ziggurat_constants.h new file mode 100644 index 000000000..17eccec0f --- /dev/null +++ b/numpy/random/src/distributions/ziggurat_constants.h @@ -0,0 +1,1196 @@ +static const uint64_t ki_double[] = { + 0x000EF33D8025EF6AULL, 0x0000000000000000ULL, 0x000C08BE98FBC6A8ULL, + 0x000DA354FABD8142ULL, 0x000E51F67EC1EEEAULL, 0x000EB255E9D3F77EULL, + 0x000EEF4B817ECAB9ULL, 0x000F19470AFA44AAULL, 0x000F37ED61FFCB18ULL, + 0x000F4F469561255CULL, 0x000F61A5E41BA396ULL, 0x000F707A755396A4ULL, + 0x000F7CB2EC28449AULL, 0x000F86F10C6357D3ULL, 0x000F8FA6578325DEULL, + 0x000F9724C74DD0DAULL, 0x000F9DA907DBF509ULL, 0x000FA360F581FA74ULL, + 0x000FA86FDE5B4BF8ULL, 0x000FACF160D354DCULL, 0x000FB0FB6718B90FULL, + 0x000FB49F8D5374C6ULL, 0x000FB7EC2366FE77ULL, 0x000FBAECE9A1E50EULL, + 0x000FBDAB9D040BEDULL, 0x000FC03060FF6C57ULL, 0x000FC2821037A248ULL, + 0x000FC4A67AE25BD1ULL, 0x000FC6A2977AEE31ULL, 0x000FC87AA92896A4ULL, + 0x000FCA325E4BDE85ULL, 0x000FCBCCE902231AULL, 0x000FCD4D12F839C4ULL, + 0x000FCEB54D8FEC99ULL, 0x000FD007BF1DC930ULL, 0x000FD1464DD6C4E6ULL, + 0x000FD272A8E2F450ULL, 0x000FD38E4FF0C91EULL, 0x000FD49A9990B478ULL, + 0x000FD598B8920F53ULL, 0x000FD689C08E99ECULL, 0x000FD76EA9C8E832ULL, + 0x000FD848547B08E8ULL, 0x000FD9178BAD2C8CULL, 0x000FD9DD07A7ADD2ULL, + 0x000FDA9970105E8CULL, 0x000FDB4D5DC02E20ULL, 0x000FDBF95C5BFCD0ULL, + 0x000FDC9DEBB99A7DULL, 0x000FDD3B8118729DULL, 0x000FDDD288342F90ULL, + 0x000FDE6364369F64ULL, 0x000FDEEE708D514EULL, 0x000FDF7401A6B42EULL, + 0x000FDFF46599ED40ULL, 0x000FE06FE4BC24F2ULL, 0x000FE0E6C225A258ULL, + 0x000FE1593C28B84CULL, 0x000FE1C78CBC3F99ULL, 0x000FE231E9DB1CAAULL, + 0x000FE29885DA1B91ULL, 0x000FE2FB8FB54186ULL, 0x000FE35B33558D4AULL, + 0x000FE3B799D0002AULL, 0x000FE410E99EAD7FULL, 0x000FE46746D47734ULL, + 0x000FE4BAD34C095CULL, 0x000FE50BAED29524ULL, 0x000FE559F74EBC78ULL, + 0x000FE5A5C8E41212ULL, 0x000FE5EF3E138689ULL, 0x000FE6366FD91078ULL, + 0x000FE67B75C6D578ULL, 0x000FE6BE661E11AAULL, 0x000FE6FF55E5F4F2ULL, + 0x000FE73E5900A702ULL, 0x000FE77B823E9E39ULL, 0x000FE7B6E37070A2ULL, + 0x000FE7F08D774243ULL, 0x000FE8289053F08CULL, 0x000FE85EFB35173AULL, + 0x000FE893DC840864ULL, 0x000FE8C741F0CEBCULL, 0x000FE8F9387D4EF6ULL, + 0x000FE929CC879B1DULL, 0x000FE95909D388EAULL, 0x000FE986FB939AA2ULL, + 0x000FE9B3AC714866ULL, 0x000FE9DF2694B6D5ULL, 0x000FEA0973ABE67CULL, + 0x000FEA329CF166A4ULL, 0x000FEA5AAB32952CULL, 0x000FEA81A6D5741AULL, + 0x000FEAA797DE1CF0ULL, 0x000FEACC85F3D920ULL, 0x000FEAF07865E63CULL, + 0x000FEB13762FEC13ULL, 0x000FEB3585FE2A4AULL, 0x000FEB56AE3162B4ULL, + 0x000FEB76F4E284FAULL, 0x000FEB965FE62014ULL, 0x000FEBB4F4CF9D7CULL, + 0x000FEBD2B8F449D0ULL, 0x000FEBEFB16E2E3EULL, 0x000FEC0BE31EBDE8ULL, + 0x000FEC2752B15A15ULL, 0x000FEC42049DAFD3ULL, 0x000FEC5BFD29F196ULL, + 0x000FEC75406CEEF4ULL, 0x000FEC8DD2500CB4ULL, 0x000FECA5B6911F12ULL, + 0x000FECBCF0C427FEULL, 0x000FECD38454FB15ULL, 0x000FECE97488C8B3ULL, + 0x000FECFEC47F91B7ULL, 0x000FED1377358528ULL, 0x000FED278F844903ULL, + 0x000FED3B10242F4CULL, 0x000FED4DFBAD586EULL, 0x000FED605498C3DDULL, + 0x000FED721D414FE8ULL, 0x000FED8357E4A982ULL, 0x000FED9406A42CC8ULL, + 0x000FEDA42B85B704ULL, 0x000FEDB3C8746AB4ULL, 0x000FEDC2DF416652ULL, + 0x000FEDD171A46E52ULL, 0x000FEDDF813C8AD3ULL, 0x000FEDED0F909980ULL, + 0x000FEDFA1E0FD414ULL, 0x000FEE06AE124BC4ULL, 0x000FEE12C0D95A06ULL, + 0x000FEE1E579006E0ULL, 0x000FEE29734B6524ULL, 0x000FEE34150AE4BCULL, + 0x000FEE3E3DB89B3CULL, 0x000FEE47EE2982F4ULL, 0x000FEE51271DB086ULL, + 0x000FEE59E9407F41ULL, 0x000FEE623528B42EULL, 0x000FEE6A0B5897F1ULL, + 0x000FEE716C3E077AULL, 0x000FEE7858327B82ULL, 0x000FEE7ECF7B06BAULL, + 0x000FEE84D2484AB2ULL, 0x000FEE8A60B66343ULL, 0x000FEE8F7ACCC851ULL, + 0x000FEE94207E25DAULL, 0x000FEE9851A829EAULL, 0x000FEE9C0E13485CULL, + 0x000FEE9F557273F4ULL, 0x000FEEA22762CCAEULL, 0x000FEEA4836B42ACULL, + 0x000FEEA668FC2D71ULL, 0x000FEEA7D76ED6FAULL, 0x000FEEA8CE04FA0AULL, + 0x000FEEA94BE8333BULL, 0x000FEEA950296410ULL, 0x000FEEA8D9C0075EULL, + 0x000FEEA7E7897654ULL, 0x000FEEA678481D24ULL, 0x000FEEA48AA29E83ULL, + 0x000FEEA21D22E4DAULL, 0x000FEE9F2E352024ULL, 0x000FEE9BBC26AF2EULL, + 0x000FEE97C524F2E4ULL, 0x000FEE93473C0A3AULL, 0x000FEE8E40557516ULL, + 0x000FEE88AE369C7AULL, 0x000FEE828E7F3DFDULL, 0x000FEE7BDEA7B888ULL, + 0x000FEE749BFF37FFULL, 0x000FEE6CC3A9BD5EULL, 0x000FEE64529E007EULL, + 0x000FEE5B45A32888ULL, 0x000FEE51994E57B6ULL, 0x000FEE474A0006CFULL, + 0x000FEE3C53E12C50ULL, 0x000FEE30B2E02AD8ULL, 0x000FEE2462AD8205ULL, + 0x000FEE175EB83C5AULL, 0x000FEE09A22A1447ULL, 0x000FEDFB27E349CCULL, + 0x000FEDEBEA76216CULL, 0x000FEDDBE422047EULL, 0x000FEDCB0ECE39D3ULL, + 0x000FEDB964042CF4ULL, 0x000FEDA6DCE938C9ULL, 0x000FED937237E98DULL, + 0x000FED7F1C38A836ULL, 0x000FED69D2B9C02BULL, 0x000FED538D06AE00ULL, + 0x000FED3C41DEA422ULL, 0x000FED23E76A2FD8ULL, 0x000FED0A732FE644ULL, + 0x000FECEFDA07FE34ULL, 0x000FECD4100EB7B8ULL, 0x000FECB708956EB4ULL, + 0x000FEC98B61230C1ULL, 0x000FEC790A0DA978ULL, 0x000FEC57F50F31FEULL, + 0x000FEC356686C962ULL, 0x000FEC114CB4B335ULL, 0x000FEBEB948E6FD0ULL, + 0x000FEBC429A0B692ULL, 0x000FEB9AF5EE0CDCULL, 0x000FEB6FE1C98542ULL, + 0x000FEB42D3AD1F9EULL, 0x000FEB13B00B2D4BULL, 0x000FEAE2591A02E9ULL, + 0x000FEAAEAE992257ULL, 0x000FEA788D8EE326ULL, 0x000FEA3FCFFD73E5ULL, + 0x000FEA044C8DD9F6ULL, 0x000FE9C5D62F563BULL, 0x000FE9843BA947A4ULL, + 0x000FE93F471D4728ULL, 0x000FE8F6BD76C5D6ULL, 0x000FE8AA5DC4E8E6ULL, + 0x000FE859E07AB1EAULL, 0x000FE804F690A940ULL, 0x000FE7AB488233C0ULL, + 0x000FE74C751F6AA5ULL, 0x000FE6E8102AA202ULL, 0x000FE67DA0B6ABD8ULL, + 0x000FE60C9F38307EULL, 0x000FE5947338F742ULL, 0x000FE51470977280ULL, + 0x000FE48BD436F458ULL, 0x000FE3F9BFFD1E37ULL, 0x000FE35D35EEB19CULL, + 0x000FE2B5122FE4FEULL, 0x000FE20003995557ULL, 0x000FE13C82788314ULL, + 0x000FE068C4EE67B0ULL, 0x000FDF82B02B71AAULL, 0x000FDE87C57EFEAAULL, + 0x000FDD7509C63BFDULL, 0x000FDC46E529BF13ULL, 0x000FDAF8F82E0282ULL, + 0x000FD985E1B2BA75ULL, 0x000FD7E6EF48CF04ULL, 0x000FD613ADBD650BULL, + 0x000FD40149E2F012ULL, 0x000FD1A1A7B4C7ACULL, 0x000FCEE204761F9EULL, + 0x000FCBA8D85E11B2ULL, 0x000FC7D26ECD2D22ULL, 0x000FC32B2F1E22EDULL, + 0x000FBD6581C0B83AULL, 0x000FB606C4005434ULL, 0x000FAC40582A2874ULL, + 0x000F9E971E014598ULL, 0x000F89FA48A41DFCULL, 0x000F66C5F7F0302CULL, + 0x000F1A5A4B331C4AULL}; + +static const double wi_double[] = { + 8.68362706080130616677e-16, 4.77933017572773682428e-17, + 6.35435241740526230246e-17, 7.45487048124769627714e-17, + 8.32936681579309972857e-17, 9.06806040505948228243e-17, + 9.71486007656776183958e-17, 1.02947503142410192108e-16, + 1.08234302884476839838e-16, 1.13114701961090307945e-16, + 1.17663594570229211411e-16, 1.21936172787143633280e-16, + 1.25974399146370927864e-16, 1.29810998862640315416e-16, + 1.33472037368241227547e-16, 1.36978648425712032797e-16, + 1.40348230012423820659e-16, 1.43595294520569430270e-16, + 1.46732087423644219083e-16, 1.49769046683910367425e-16, + 1.52715150035961979750e-16, 1.55578181694607639484e-16, + 1.58364940092908853989e-16, 1.61081401752749279325e-16, + 1.63732852039698532012e-16, 1.66323990584208352778e-16, + 1.68859017086765964015e-16, 1.71341701765596607184e-16, + 1.73775443658648593310e-16, 1.76163319230009959832e-16, + 1.78508123169767272927e-16, 1.80812402857991522674e-16, + 1.83078487648267501776e-16, 1.85308513886180189386e-16, + 1.87504446393738816849e-16, 1.89668097007747596212e-16, + 1.91801140648386198029e-16, 1.93905129306251037069e-16, + 1.95981504266288244037e-16, 1.98031606831281739736e-16, + 2.00056687762733300198e-16, 2.02057915620716538808e-16, + 2.04036384154802118313e-16, 2.05993118874037063144e-16, + 2.07929082904140197311e-16, 2.09845182223703516690e-16, + 2.11742270357603418769e-16, 2.13621152594498681022e-16, + 2.15482589785814580926e-16, 2.17327301775643674990e-16, + 2.19155970504272708519e-16, 2.20969242822353175995e-16, + 2.22767733047895534948e-16, 2.24552025294143552381e-16, + 2.26322675592856786566e-16, 2.28080213834501706782e-16, + 2.29825145544246839061e-16, 2.31557953510408037008e-16, + 2.33279099280043561128e-16, 2.34989024534709550938e-16, + 2.36688152357916037468e-16, 2.38376888404542434981e-16, + 2.40055621981350627349e-16, 2.41724727046750252175e-16, + 2.43384563137110286400e-16, 2.45035476226149539878e-16, + 2.46677799523270498158e-16, 2.48311854216108767769e-16, + 2.49937950162045242375e-16, 2.51556386532965786439e-16, + 2.53167452417135826983e-16, 2.54771427381694417303e-16, + 2.56368581998939683749e-16, 2.57959178339286723500e-16, + 2.59543470433517070146e-16, 2.61121704706701939097e-16, + 2.62694120385972564623e-16, 2.64260949884118951286e-16, + 2.65822419160830680292e-16, 2.67378748063236329361e-16, + 2.68930150647261591777e-16, 2.70476835481199518794e-16, + 2.72019005932773206655e-16, 2.73556860440867908686e-16, + 2.75090592773016664571e-16, 2.76620392269639032183e-16, + 2.78146444075954410103e-16, 2.79668929362423005309e-16, + 2.81188025534502074329e-16, 2.82703906432447923059e-16, + 2.84216742521840606520e-16, 2.85726701075460149289e-16, + 2.87233946347097994381e-16, 2.88738639737848191815e-16, + 2.90240939955384233230e-16, 2.91741003166694553259e-16, + 2.93238983144718163965e-16, 2.94735031409293489611e-16, + 2.96229297362806647792e-16, 2.97721928420902891115e-16, + 2.99213070138601307081e-16, 3.00702866332133102993e-16, + 3.02191459196806151971e-16, 3.03678989421180184427e-16, + 3.05165596297821922381e-16, 3.06651417830895451744e-16, + 3.08136590840829717032e-16, 3.09621251066292253306e-16, + 3.11105533263689296831e-16, 3.12589571304399892784e-16, + 3.14073498269944617203e-16, 3.15557446545280064031e-16, + 3.17041547910402852545e-16, 3.18525933630440648871e-16, + 3.20010734544401137886e-16, 3.21496081152744704901e-16, + 3.22982103703941557538e-16, 3.24468932280169778077e-16, + 3.25956696882307838340e-16, 3.27445527514370671802e-16, + 3.28935554267536967851e-16, 3.30426907403912838589e-16, + 3.31919717440175233652e-16, 3.33414115231237245918e-16, + 3.34910232054077845412e-16, 3.36408199691876507948e-16, + 3.37908150518594979994e-16, 3.39410217584148914282e-16, + 3.40914534700312603713e-16, 3.42421236527501816058e-16, + 3.43930458662583133920e-16, 3.45442337727858401604e-16, + 3.46957011461378353333e-16, 3.48474618808741370700e-16, + 3.49995300016538099813e-16, 3.51519196727607440975e-16, + 3.53046452078274009054e-16, 3.54577210797743572160e-16, + 3.56111619309838843415e-16, 3.57649825837265051035e-16, + 3.59191980508602994994e-16, 3.60738235468235137839e-16, + 3.62288744989419151904e-16, 3.63843665590734438546e-16, + 3.65403156156136995766e-16, 3.66967378058870090021e-16, + 3.68536495289491401456e-16, 3.70110674588289834952e-16, + 3.71690085582382297792e-16, 3.73274900927794352614e-16, + 3.74865296456848868882e-16, 3.76461451331202869131e-16, + 3.78063548200896037651e-16, 3.79671773369794425924e-16, + 3.81286316967837738238e-16, 3.82907373130524317507e-16, + 3.84535140186095955858e-16, 3.86169820850914927119e-16, + 3.87811622433558721164e-16, 3.89460757048192620674e-16, + 3.91117441837820542060e-16, 3.92781899208054153270e-16, + 3.94454357072087711446e-16, 3.96135049107613542983e-16, + 3.97824215026468259474e-16, 3.99522100857856502444e-16, + 4.01228959246062907451e-16, 4.02945049763632792393e-16, + 4.04670639241074995115e-16, 4.06406002114225038723e-16, + 4.08151420790493873480e-16, 4.09907186035326643447e-16, + 4.11673597380302570170e-16, 4.13450963554423599878e-16, + 4.15239602940268833891e-16, 4.17039844056831587498e-16, + 4.18852026071011229572e-16, 4.20676499339901510978e-16, + 4.22513625986204937320e-16, 4.24363780509307796137e-16, + 4.26227350434779809917e-16, 4.28104737005311666397e-16, + 4.29996355916383230161e-16, 4.31902638100262944617e-16, + 4.33824030562279080411e-16, 4.35760997273684900553e-16, + 4.37714020125858747008e-16, 4.39683599951052137423e-16, + 4.41670257615420348435e-16, 4.43674535190656726604e-16, + 4.45696997211204306674e-16, 4.47738232024753387312e-16, + 4.49798853244554968009e-16, 4.51879501313005876278e-16, + 4.53980845187003400947e-16, 4.56103584156742206384e-16, + 4.58248449810956667052e-16, 4.60416208163115281428e-16, + 4.62607661954784567754e-16, 4.64823653154320737780e-16, + 4.67065065671263059081e-16, 4.69332828309332890697e-16, + 4.71627917983835129766e-16, 4.73951363232586715165e-16, + 4.76304248053313737663e-16, 4.78687716104872284247e-16, + 4.81102975314741720538e-16, 4.83551302941152515162e-16, + 4.86034051145081195402e-16, 4.88552653135360343280e-16, + 4.91108629959526955862e-16, 4.93703598024033454728e-16, + 4.96339277440398725619e-16, 4.99017501309182245754e-16, + 5.01740226071808946011e-16, 5.04509543081872748637e-16, + 5.07327691573354207058e-16, 5.10197073234156184149e-16, + 5.13120268630678373200e-16, 5.16100055774322824569e-16, + 5.19139431175769859873e-16, 5.22241633800023428760e-16, + 5.25410172417759732697e-16, 5.28648856950494511482e-16, + 5.31961834533840037535e-16, 5.35353631181649688145e-16, + 5.38829200133405320160e-16, 5.42393978220171234073e-16, + 5.46053951907478041166e-16, 5.49815735089281410703e-16, + 5.53686661246787600374e-16, 5.57674893292657647836e-16, + 5.61789555355541665830e-16, 5.66040892008242216739e-16, + 5.70440462129138908417e-16, 5.75001376891989523684e-16, + 5.79738594572459365014e-16, 5.84669289345547900201e-16, + 5.89813317647789942685e-16, 5.95193814964144415532e-16, + 6.00837969627190832234e-16, 6.06778040933344851394e-16, + 6.13052720872528159123e-16, 6.19708989458162555387e-16, + 6.26804696330128439415e-16, 6.34412240712750598627e-16, + 6.42623965954805540945e-16, 6.51560331734499356881e-16, + 6.61382788509766415145e-16, 6.72315046250558662913e-16, + 6.84680341756425875856e-16, 6.98971833638761995415e-16, + 7.15999493483066421560e-16, 7.37242430179879890722e-16, + 7.65893637080557275482e-16, 8.11384933765648418565e-16}; + +static const double fi_double[] = { + 1.00000000000000000000e+00, 9.77101701267671596263e-01, + 9.59879091800106665211e-01, 9.45198953442299649730e-01, + 9.32060075959230460718e-01, 9.19991505039347012840e-01, + 9.08726440052130879366e-01, 8.98095921898343418910e-01, + 8.87984660755833377088e-01, 8.78309655808917399966e-01, + 8.69008688036857046555e-01, 8.60033621196331532488e-01, + 8.51346258458677951353e-01, 8.42915653112204177333e-01, + 8.34716292986883434679e-01, 8.26726833946221373317e-01, + 8.18929191603702366642e-01, 8.11307874312656274185e-01, + 8.03849483170964274059e-01, 7.96542330422958966274e-01, + 7.89376143566024590648e-01, 7.82341832654802504798e-01, + 7.75431304981187174974e-01, 7.68637315798486264740e-01, + 7.61953346836795386565e-01, 7.55373506507096115214e-01, + 7.48892447219156820459e-01, 7.42505296340151055290e-01, + 7.36207598126862650112e-01, 7.29995264561476231435e-01, + 7.23864533468630222401e-01, 7.17811932630721960535e-01, + 7.11834248878248421200e-01, 7.05928501332754310127e-01, + 7.00091918136511615067e-01, 6.94321916126116711609e-01, + 6.88616083004671808432e-01, 6.82972161644994857355e-01, + 6.77388036218773526009e-01, 6.71861719897082099173e-01, + 6.66391343908750100056e-01, 6.60975147776663107813e-01, + 6.55611470579697264149e-01, 6.50298743110816701574e-01, + 6.45035480820822293424e-01, 6.39820277453056585060e-01, + 6.34651799287623608059e-01, 6.29528779924836690007e-01, + 6.24450015547026504592e-01, 6.19414360605834324325e-01, + 6.14420723888913888899e-01, 6.09468064925773433949e-01, + 6.04555390697467776029e-01, 5.99681752619125263415e-01, + 5.94846243767987448159e-01, 5.90047996332826008015e-01, + 5.85286179263371453274e-01, 5.80559996100790898232e-01, + 5.75868682972353718164e-01, 5.71211506735253227163e-01, + 5.66587763256164445025e-01, 5.61996775814524340831e-01, + 5.57437893618765945014e-01, 5.52910490425832290562e-01, + 5.48413963255265812791e-01, 5.43947731190026262382e-01, + 5.39511234256952132426e-01, 5.35103932380457614215e-01, + 5.30725304403662057062e-01, 5.26374847171684479008e-01, + 5.22052074672321841931e-01, 5.17756517229756352272e-01, + 5.13487720747326958914e-01, 5.09245245995747941592e-01, + 5.05028667943468123624e-01, 5.00837575126148681903e-01, + 4.96671569052489714213e-01, 4.92530263643868537748e-01, + 4.88413284705458028423e-01, 4.84320269426683325253e-01, + 4.80250865909046753544e-01, 4.76204732719505863248e-01, + 4.72181538467730199660e-01, 4.68180961405693596422e-01, + 4.64202689048174355069e-01, 4.60246417812842867345e-01, + 4.56311852678716434184e-01, 4.52398706861848520777e-01, + 4.48506701507203064949e-01, 4.44635565395739396077e-01, + 4.40785034665803987508e-01, 4.36954852547985550526e-01, + 4.33144769112652261445e-01, 4.29354541029441427735e-01, + 4.25583931338021970170e-01, 4.21832709229495894654e-01, + 4.18100649837848226120e-01, 4.14387534040891125642e-01, + 4.10693148270188157500e-01, 4.07017284329473372217e-01, + 4.03359739221114510510e-01, 3.99720314980197222177e-01, + 3.96098818515832451492e-01, 3.92495061459315619512e-01, + 3.88908860018788715696e-01, 3.85340034840077283462e-01, + 3.81788410873393657674e-01, 3.78253817245619183840e-01, + 3.74736087137891138443e-01, 3.71235057668239498696e-01, + 3.67750569779032587814e-01, 3.64282468129004055601e-01, + 3.60830600989648031529e-01, 3.57394820145780500731e-01, + 3.53974980800076777232e-01, 3.50570941481406106455e-01, + 3.47182563956793643900e-01, 3.43809713146850715049e-01, + 3.40452257044521866547e-01, 3.37110066637006045021e-01, + 3.33783015830718454708e-01, 3.30470981379163586400e-01, + 3.27173842813601400970e-01, 3.23891482376391093290e-01, + 3.20623784956905355514e-01, 3.17370638029913609834e-01, + 3.14131931596337177215e-01, 3.10907558126286509559e-01, + 3.07697412504292056035e-01, 3.04501391976649993243e-01, + 3.01319396100803049698e-01, 2.98151326696685481377e-01, + 2.94997087799961810184e-01, 2.91856585617095209972e-01, + 2.88729728482182923521e-01, 2.85616426815501756042e-01, + 2.82516593083707578948e-01, 2.79430141761637940157e-01, + 2.76356989295668320494e-01, 2.73297054068577072172e-01, + 2.70250256365875463072e-01, 2.67216518343561471038e-01, + 2.64195763997261190426e-01, 2.61187919132721213522e-01, + 2.58192911337619235290e-01, 2.55210669954661961700e-01, + 2.52241126055942177508e-01, 2.49284212418528522415e-01, + 2.46339863501263828249e-01, 2.43408015422750312329e-01, + 2.40488605940500588254e-01, 2.37581574431238090606e-01, + 2.34686861872330010392e-01, 2.31804410824338724684e-01, + 2.28934165414680340644e-01, 2.26076071322380278694e-01, + 2.23230075763917484855e-01, 2.20396127480151998723e-01, + 2.17574176724331130872e-01, 2.14764175251173583536e-01, + 2.11966076307030182324e-01, 2.09179834621125076977e-01, + 2.06405406397880797353e-01, 2.03642749310334908452e-01, + 2.00891822494656591136e-01, 1.98152586545775138971e-01, + 1.95425003514134304483e-01, 1.92709036903589175926e-01, + 1.90004651670464985713e-01, 1.87311814223800304768e-01, + 1.84630492426799269756e-01, 1.81960655599522513892e-01, + 1.79302274522847582272e-01, 1.76655321443734858455e-01, + 1.74019770081838553999e-01, 1.71395595637505754327e-01, + 1.68782774801211288285e-01, 1.66181285764481906364e-01, + 1.63591108232365584074e-01, 1.61012223437511009516e-01, + 1.58444614155924284882e-01, 1.55888264724479197465e-01, + 1.53343161060262855866e-01, 1.50809290681845675763e-01, + 1.48286642732574552861e-01, 1.45775208005994028060e-01, + 1.43274978973513461566e-01, 1.40785949814444699690e-01, + 1.38308116448550733057e-01, 1.35841476571253755301e-01, + 1.33386029691669155683e-01, 1.30941777173644358090e-01, + 1.28508722279999570981e-01, 1.26086870220185887081e-01, + 1.23676228201596571932e-01, 1.21276805484790306533e-01, + 1.18888613442910059947e-01, 1.16511665625610869035e-01, + 1.14145977827838487895e-01, 1.11791568163838089811e-01, + 1.09448457146811797824e-01, 1.07116667774683801961e-01, + 1.04796225622487068629e-01, 1.02487158941935246892e-01, + 1.00189498768810017482e-01, 9.79032790388624646338e-02, + 9.56285367130089991594e-02, 9.33653119126910124859e-02, + 9.11136480663737591268e-02, 8.88735920682758862021e-02, + 8.66451944505580717859e-02, 8.44285095703534715916e-02, + 8.22235958132029043366e-02, 8.00305158146630696292e-02, + 7.78493367020961224423e-02, 7.56801303589271778804e-02, + 7.35229737139813238622e-02, 7.13779490588904025339e-02, + 6.92451443970067553879e-02, 6.71246538277884968737e-02, + 6.50165779712428976156e-02, 6.29210244377581412456e-02, + 6.08381083495398780614e-02, 5.87679529209337372930e-02, + 5.67106901062029017391e-02, 5.46664613248889208474e-02, + 5.26354182767921896513e-02, 5.06177238609477817000e-02, + 4.86135532158685421122e-02, 4.66230949019303814174e-02, + 4.46465522512944634759e-02, 4.26841449164744590750e-02, + 4.07361106559409394401e-02, 3.88027074045261474722e-02, + 3.68842156885673053135e-02, 3.49809414617161251737e-02, + 3.30932194585785779961e-02, 3.12214171919203004046e-02, + 2.93659397581333588001e-02, 2.75272356696031131329e-02, + 2.57058040085489103443e-02, 2.39022033057958785407e-02, + 2.21170627073088502113e-02, 2.03510962300445102935e-02, + 1.86051212757246224594e-02, 1.68800831525431419000e-02, + 1.51770883079353092332e-02, 1.34974506017398673818e-02, + 1.18427578579078790488e-02, 1.02149714397014590439e-02, + 8.61658276939872638800e-03, 7.05087547137322242369e-03, + 5.52240329925099155545e-03, 4.03797259336302356153e-03, + 2.60907274610215926189e-03, 1.26028593049859797236e-03}; + +static const uint32_t ki_float[] = { + 0x007799ECUL, 0x00000000UL, 0x006045F5UL, 0x006D1AA8UL, 0x00728FB4UL, + 0x007592AFUL, 0x00777A5CUL, 0x0078CA38UL, 0x0079BF6BUL, 0x007A7A35UL, + 0x007B0D2FUL, 0x007B83D4UL, 0x007BE597UL, 0x007C3788UL, 0x007C7D33UL, + 0x007CB926UL, 0x007CED48UL, 0x007D1B08UL, 0x007D437FUL, 0x007D678BUL, + 0x007D87DBUL, 0x007DA4FCUL, 0x007DBF61UL, 0x007DD767UL, 0x007DED5DUL, + 0x007E0183UL, 0x007E1411UL, 0x007E2534UL, 0x007E3515UL, 0x007E43D5UL, + 0x007E5193UL, 0x007E5E67UL, 0x007E6A69UL, 0x007E75AAUL, 0x007E803EUL, + 0x007E8A32UL, 0x007E9395UL, 0x007E9C72UL, 0x007EA4D5UL, 0x007EACC6UL, + 0x007EB44EUL, 0x007EBB75UL, 0x007EC243UL, 0x007EC8BCUL, 0x007ECEE8UL, + 0x007ED4CCUL, 0x007EDA6BUL, 0x007EDFCBUL, 0x007EE4EFUL, 0x007EE9DCUL, + 0x007EEE94UL, 0x007EF31BUL, 0x007EF774UL, 0x007EFBA0UL, 0x007EFFA3UL, + 0x007F037FUL, 0x007F0736UL, 0x007F0ACAUL, 0x007F0E3CUL, 0x007F118FUL, + 0x007F14C4UL, 0x007F17DCUL, 0x007F1ADAUL, 0x007F1DBDUL, 0x007F2087UL, + 0x007F233AUL, 0x007F25D7UL, 0x007F285DUL, 0x007F2AD0UL, 0x007F2D2EUL, + 0x007F2F7AUL, 0x007F31B3UL, 0x007F33DCUL, 0x007F35F3UL, 0x007F37FBUL, + 0x007F39F3UL, 0x007F3BDCUL, 0x007F3DB7UL, 0x007F3F84UL, 0x007F4145UL, + 0x007F42F8UL, 0x007F449FUL, 0x007F463AUL, 0x007F47CAUL, 0x007F494EUL, + 0x007F4AC8UL, 0x007F4C38UL, 0x007F4D9DUL, 0x007F4EF9UL, 0x007F504CUL, + 0x007F5195UL, 0x007F52D5UL, 0x007F540DUL, 0x007F553DUL, 0x007F5664UL, + 0x007F5784UL, 0x007F589CUL, 0x007F59ACUL, 0x007F5AB5UL, 0x007F5BB8UL, + 0x007F5CB3UL, 0x007F5DA8UL, 0x007F5E96UL, 0x007F5F7EUL, 0x007F605FUL, + 0x007F613BUL, 0x007F6210UL, 0x007F62E0UL, 0x007F63AAUL, 0x007F646FUL, + 0x007F652EUL, 0x007F65E8UL, 0x007F669CUL, 0x007F674CUL, 0x007F67F6UL, + 0x007F689CUL, 0x007F693CUL, 0x007F69D9UL, 0x007F6A70UL, 0x007F6B03UL, + 0x007F6B91UL, 0x007F6C1BUL, 0x007F6CA0UL, 0x007F6D21UL, 0x007F6D9EUL, + 0x007F6E17UL, 0x007F6E8CUL, 0x007F6EFCUL, 0x007F6F68UL, 0x007F6FD1UL, + 0x007F7035UL, 0x007F7096UL, 0x007F70F3UL, 0x007F714CUL, 0x007F71A1UL, + 0x007F71F2UL, 0x007F723FUL, 0x007F7289UL, 0x007F72CFUL, 0x007F7312UL, + 0x007F7350UL, 0x007F738BUL, 0x007F73C3UL, 0x007F73F6UL, 0x007F7427UL, + 0x007F7453UL, 0x007F747CUL, 0x007F74A1UL, 0x007F74C3UL, 0x007F74E0UL, + 0x007F74FBUL, 0x007F7511UL, 0x007F7524UL, 0x007F7533UL, 0x007F753FUL, + 0x007F7546UL, 0x007F754AUL, 0x007F754BUL, 0x007F7547UL, 0x007F753FUL, + 0x007F7534UL, 0x007F7524UL, 0x007F7511UL, 0x007F74F9UL, 0x007F74DEUL, + 0x007F74BEUL, 0x007F749AUL, 0x007F7472UL, 0x007F7445UL, 0x007F7414UL, + 0x007F73DFUL, 0x007F73A5UL, 0x007F7366UL, 0x007F7323UL, 0x007F72DAUL, + 0x007F728DUL, 0x007F723AUL, 0x007F71E3UL, 0x007F7186UL, 0x007F7123UL, + 0x007F70BBUL, 0x007F704DUL, 0x007F6FD9UL, 0x007F6F5FUL, 0x007F6EDFUL, + 0x007F6E58UL, 0x007F6DCBUL, 0x007F6D37UL, 0x007F6C9CUL, 0x007F6BF9UL, + 0x007F6B4FUL, 0x007F6A9CUL, 0x007F69E2UL, 0x007F691FUL, 0x007F6854UL, + 0x007F677FUL, 0x007F66A1UL, 0x007F65B8UL, 0x007F64C6UL, 0x007F63C8UL, + 0x007F62C0UL, 0x007F61ABUL, 0x007F608AUL, 0x007F5F5DUL, 0x007F5E21UL, + 0x007F5CD8UL, 0x007F5B7FUL, 0x007F5A17UL, 0x007F589EUL, 0x007F5713UL, + 0x007F5575UL, 0x007F53C4UL, 0x007F51FEUL, 0x007F5022UL, 0x007F4E2FUL, + 0x007F4C22UL, 0x007F49FAUL, 0x007F47B6UL, 0x007F4553UL, 0x007F42CFUL, + 0x007F4028UL, 0x007F3D5AUL, 0x007F3A64UL, 0x007F3741UL, 0x007F33EDUL, + 0x007F3065UL, 0x007F2CA4UL, 0x007F28A4UL, 0x007F245FUL, 0x007F1FCEUL, + 0x007F1AEAUL, 0x007F15A9UL, 0x007F1000UL, 0x007F09E4UL, 0x007F0346UL, + 0x007EFC16UL, 0x007EF43EUL, 0x007EEBA8UL, 0x007EE237UL, 0x007ED7C8UL, + 0x007ECC2FUL, 0x007EBF37UL, 0x007EB09DUL, 0x007EA00AUL, 0x007E8D0DUL, + 0x007E7710UL, 0x007E5D47UL, 0x007E3E93UL, 0x007E1959UL, 0x007DEB2CUL, + 0x007DB036UL, 0x007D6203UL, 0x007CF4B9UL, 0x007C4FD2UL, 0x007B3630UL, + 0x0078D2D2UL}; + +static const float wi_float[] = { + 4.66198677960027669255e-07f, 2.56588335019207033255e-08f, + 3.41146697750176784592e-08f, 4.00230311410932959821e-08f, + 4.47179475877737745459e-08f, 4.86837785973537366722e-08f, + 5.21562578925932412861e-08f, 5.52695199001886257153e-08f, + 5.81078488992733116465e-08f, 6.07279932024587421409e-08f, + 6.31701613261172047795e-08f, 6.54639842900233842742e-08f, + 6.76319905583641815324e-08f, 6.96917493470166688656e-08f, + 7.16572544283857476692e-08f, 7.35398519048393832969e-08f, + 7.53488822443557479279e-08f, 7.70921367281667127885e-08f, + 7.87761895947956022626e-08f, 8.04066446825615346857e-08f, + 8.19883218760237408659e-08f, 8.35254002936857088917e-08f, + 8.50215298165053411740e-08f, 8.64799190652369040985e-08f, + 8.79034055989140110861e-08f, 8.92945125124233511541e-08f, + 9.06554945027956262312e-08f, 9.19883756905278607229e-08f, + 9.32949809202232869780e-08f, 9.45769618559625849039e-08f, + 9.58358188855612866442e-08f, 9.70729196232813152662e-08f, + 9.82895146313061088986e-08f, 9.94867508514382224721e-08f, + 1.00665683139461669691e-07f, 1.01827284217853923044e-07f, + 1.02972453302539369464e-07f, 1.04102023612124921572e-07f, + 1.05216768930574060431e-07f, 1.06317409364335657741e-07f, + 1.07404616410877866490e-07f, 1.08479017436113134283e-07f, + 1.09541199642370962438e-07f, 1.10591713595628691212e-07f, + 1.11631076370069356306e-07f, 1.12659774359245895023e-07f, + 1.13678265795837113569e-07f, 1.14686983015899673063e-07f, + 1.15686334498432158725e-07f, 1.16676706706789039179e-07f, + 1.17658465754873988919e-07f, 1.18631958917986203582e-07f, + 1.19597516005596215528e-07f, 1.20555450611113917226e-07f, + 1.21506061251817163689e-07f, 1.22449632410483948386e-07f, + 1.23386435488872536840e-07f, 1.24316729681986364321e-07f, + 1.25240762781015530062e-07f, 1.26158771911939892267e-07f, + 1.27070984215989333455e-07f, 1.27977617477468922011e-07f, + 1.28878880703854958297e-07f, 1.29774974662539874521e-07f, + 1.30666092378141980504e-07f, 1.31552419593887221722e-07f, + 1.32434135200211397569e-07f, 1.33311411633413359243e-07f, + 1.34184415246907777059e-07f, 1.35053306657377859830e-07f, + 1.35918241067904315860e-07f, 1.36779368569952053923e-07f, + 1.37636834425917531047e-07f, 1.38490779333783508675e-07f, + 1.39341339675287344817e-07f, 1.40188647748881762555e-07f, + 1.41032831988654882776e-07f, 1.41874017170273235693e-07f, + 1.42712324604921442006e-07f, 1.43547872322127921816e-07f, + 1.44380775242292721080e-07f, 1.45211145339665544509e-07f, + 1.46039091796461362146e-07f, 1.46864721148745476208e-07f, + 1.47688137424670065700e-07f, 1.48509442275598857119e-07f, + 1.49328735100614641423e-07f, 1.50146113164867617390e-07f, + 1.50961671712187416111e-07f, 1.51775504072350982845e-07f, + 1.52587701763369746341e-07f, 1.53398354589133671168e-07f, + 1.54207550732725568797e-07f, 1.55015376845697999657e-07f, + 1.55821918133584372604e-07f, 1.56627258437898192833e-07f, + 1.57431480314857468671e-07f, 1.58234665111056041043e-07f, + 1.59036893036289199880e-07f, 1.59838243233728855017e-07f, + 1.60638793847630850137e-07f, 1.61438622088746393909e-07f, + 1.62237804297600106296e-07f, 1.63036416005787357730e-07f, + 1.63834531995435479082e-07f, 1.64632226356965902954e-07f, + 1.65429572545287097020e-07f, 1.66226643434541294491e-07f, + 1.67023511371523209274e-07f, 1.67820248227882200051e-07f, + 1.68616925451215588827e-07f, 1.69413614115155757272e-07f, + 1.70210384968549673733e-07f, 1.71007308483826142122e-07f, + 1.71804454904642543391e-07f, 1.72601894292900061024e-07f, + 1.73399696575213681990e-07f, 1.74197931588920988271e-07f, + 1.74996669127712165834e-07f, 1.75795978986961275677e-07f, + 1.76595931008838063924e-07f, 1.77396595127278238022e-07f, + 1.78198041412889183130e-07f, 1.79000340117867431104e-07f, + 1.79803561721004406185e-07f, 1.80607776972855859813e-07f, + 1.81413056941151359868e-07f, 1.82219473056520464354e-07f, + 1.83027097158612474240e-07f, 1.83836001542687613069e-07f, + 1.84646259006759307383e-07f, 1.85457942899367347876e-07f, + 1.86271127168064649331e-07f, 1.87085886408701333260e-07f, + 1.87902295915592424729e-07f, 1.88720431732658022414e-07f, + 1.89540370705627262627e-07f, 1.90362190535400839128e-07f, + 1.91185969832669990437e-07f, 1.92011788173893651535e-07f, + 1.92839726158739913768e-07f, 1.93669865469102145482e-07f, + 1.94502288929804890433e-07f, 1.95337080571120616772e-07f, + 1.96174325693223683314e-07f, 1.97014110932714374919e-07f, + 1.97856524331352952716e-07f, 1.98701655407150388211e-07f, + 1.99549595227971635348e-07f, 2.00400436487814600236e-07f, + 2.01254273585938820883e-07f, 2.02111202709026498408e-07f, + 2.02971321916571014951e-07f, 2.03834731229698846698e-07f, + 2.04701532723644121196e-07f, 2.05571830624108885378e-07f, + 2.06445731407757185541e-07f, 2.07323343907107312957e-07f, + 2.08204779420104330037e-07f, 2.09090151824673600213e-07f, + 2.09979577698577670508e-07f, 2.10873176444920111011e-07f, + 2.11771070423665379388e-07f, 2.12673385089569268965e-07f, + 2.13580249136944118603e-07f, 2.14491794651713402832e-07f, + 2.15408157271244625533e-07f, 2.16329476352486921685e-07f, + 2.17255895148978920488e-07f, 2.18187560997337924713e-07f, + 2.19124625513888206785e-07f, 2.20067244802139479285e-07f, + 2.21015579671883851683e-07f, 2.21969795870742159701e-07f, + 2.22930064329060010376e-07f, 2.23896561419128954210e-07f, + 2.24869469229791575583e-07f, 2.25848975857580322189e-07f, + 2.26835275715640744118e-07f, 2.27828569861799901001e-07f, + 2.28829066347263833069e-07f, 2.29836980587561823183e-07f, + 2.30852535757505260518e-07f, 2.31875963212094114516e-07f, + 2.32907502935486642699e-07f, 2.33947404020352726160e-07f, + 2.34995925180156140289e-07f, 2.36053335297164516378e-07f, + 2.37119914009265667728e-07f, 2.38195952338983970691e-07f, + 2.39281753368440712742e-07f, 2.40377632964396957621e-07f, + 2.41483920557958384709e-07f, 2.42600959984018662258e-07f, + 2.43729110386077326413e-07f, 2.44868747192698939290e-07f, + 2.46020263172594533433e-07f, 2.47184069576113545901e-07f, + 2.48360597371852893654e-07f, 2.49550298588131851232e-07f, + 2.50753647770270890721e-07f, 2.51971143565970967140e-07f, + 2.53203310452642767375e-07f, 2.54450700622322097890e-07f, + 2.55713896041856770961e-07f, 2.56993510708419870887e-07f, + 2.58290193123138874550e-07f, 2.59604629008804833146e-07f, + 2.60937544301314385690e-07f, 2.62289708448800566945e-07f, + 2.63661938057441759882e-07f, 2.65055100928844238758e-07f, + 2.66470120540847889467e-07f, 2.67907981031821866252e-07f, + 2.69369732758258246335e-07f, 2.70856498507068313229e-07f, + 2.72369480457841388042e-07f, 2.73909968006952220135e-07f, + 2.75479346585437289399e-07f, 2.77079107626811561009e-07f, + 2.78710859870496796972e-07f, 2.80376342222588603820e-07f, + 2.82077438439999912690e-07f, 2.83816193958769527230e-07f, + 2.85594835255375795814e-07f, 2.87415792215003905739e-07f, + 2.89281724087851835900e-07f, 2.91195549750371467233e-07f, + 2.93160483161771875581e-07f, 2.95180075129332912389e-07f, + 2.97258262785797916083e-07f, 2.99399428561531794298e-07f, + 3.01608470935804138388e-07f, 3.03890889921758510417e-07f, + 3.06252891144972267537e-07f, 3.08701513613258141075e-07f, + 3.11244787989714509378e-07f, 3.13891934589336184321e-07f, + 3.16653613755314681314e-07f, 3.19542246256559459667e-07f, + 3.22572428717978242099e-07f, 3.25761480217458181578e-07f, + 3.29130173358915628534e-07f, 3.32703730345002116955e-07f, + 3.36513208964639108346e-07f, 3.40597478255417943913e-07f, + 3.45006114675213401550e-07f, 3.49803789521323211592e-07f, + 3.55077180848341416206e-07f, 3.60946392031859609868e-07f, + 3.67584959507244041831e-07f, 3.75257645787954431030e-07f, + 3.84399301057791926300e-07f, 3.95804015855768440983e-07f, + 4.11186015434435801956e-07f, 4.35608969373823260746e-07f}; + +static const float fi_float[] = { + 1.00000000000000000000e+00f, 9.77101701267671596263e-01f, + 9.59879091800106665211e-01f, 9.45198953442299649730e-01f, + 9.32060075959230460718e-01f, 9.19991505039347012840e-01f, + 9.08726440052130879366e-01f, 8.98095921898343418910e-01f, + 8.87984660755833377088e-01f, 8.78309655808917399966e-01f, + 8.69008688036857046555e-01f, 8.60033621196331532488e-01f, + 8.51346258458677951353e-01f, 8.42915653112204177333e-01f, + 8.34716292986883434679e-01f, 8.26726833946221373317e-01f, + 8.18929191603702366642e-01f, 8.11307874312656274185e-01f, + 8.03849483170964274059e-01f, 7.96542330422958966274e-01f, + 7.89376143566024590648e-01f, 7.82341832654802504798e-01f, + 7.75431304981187174974e-01f, 7.68637315798486264740e-01f, + 7.61953346836795386565e-01f, 7.55373506507096115214e-01f, + 7.48892447219156820459e-01f, 7.42505296340151055290e-01f, + 7.36207598126862650112e-01f, 7.29995264561476231435e-01f, + 7.23864533468630222401e-01f, 7.17811932630721960535e-01f, + 7.11834248878248421200e-01f, 7.05928501332754310127e-01f, + 7.00091918136511615067e-01f, 6.94321916126116711609e-01f, + 6.88616083004671808432e-01f, 6.82972161644994857355e-01f, + 6.77388036218773526009e-01f, 6.71861719897082099173e-01f, + 6.66391343908750100056e-01f, 6.60975147776663107813e-01f, + 6.55611470579697264149e-01f, 6.50298743110816701574e-01f, + 6.45035480820822293424e-01f, 6.39820277453056585060e-01f, + 6.34651799287623608059e-01f, 6.29528779924836690007e-01f, + 6.24450015547026504592e-01f, 6.19414360605834324325e-01f, + 6.14420723888913888899e-01f, 6.09468064925773433949e-01f, + 6.04555390697467776029e-01f, 5.99681752619125263415e-01f, + 5.94846243767987448159e-01f, 5.90047996332826008015e-01f, + 5.85286179263371453274e-01f, 5.80559996100790898232e-01f, + 5.75868682972353718164e-01f, 5.71211506735253227163e-01f, + 5.66587763256164445025e-01f, 5.61996775814524340831e-01f, + 5.57437893618765945014e-01f, 5.52910490425832290562e-01f, + 5.48413963255265812791e-01f, 5.43947731190026262382e-01f, + 5.39511234256952132426e-01f, 5.35103932380457614215e-01f, + 5.30725304403662057062e-01f, 5.26374847171684479008e-01f, + 5.22052074672321841931e-01f, 5.17756517229756352272e-01f, + 5.13487720747326958914e-01f, 5.09245245995747941592e-01f, + 5.05028667943468123624e-01f, 5.00837575126148681903e-01f, + 4.96671569052489714213e-01f, 4.92530263643868537748e-01f, + 4.88413284705458028423e-01f, 4.84320269426683325253e-01f, + 4.80250865909046753544e-01f, 4.76204732719505863248e-01f, + 4.72181538467730199660e-01f, 4.68180961405693596422e-01f, + 4.64202689048174355069e-01f, 4.60246417812842867345e-01f, + 4.56311852678716434184e-01f, 4.52398706861848520777e-01f, + 4.48506701507203064949e-01f, 4.44635565395739396077e-01f, + 4.40785034665803987508e-01f, 4.36954852547985550526e-01f, + 4.33144769112652261445e-01f, 4.29354541029441427735e-01f, + 4.25583931338021970170e-01f, 4.21832709229495894654e-01f, + 4.18100649837848226120e-01f, 4.14387534040891125642e-01f, + 4.10693148270188157500e-01f, 4.07017284329473372217e-01f, + 4.03359739221114510510e-01f, 3.99720314980197222177e-01f, + 3.96098818515832451492e-01f, 3.92495061459315619512e-01f, + 3.88908860018788715696e-01f, 3.85340034840077283462e-01f, + 3.81788410873393657674e-01f, 3.78253817245619183840e-01f, + 3.74736087137891138443e-01f, 3.71235057668239498696e-01f, + 3.67750569779032587814e-01f, 3.64282468129004055601e-01f, + 3.60830600989648031529e-01f, 3.57394820145780500731e-01f, + 3.53974980800076777232e-01f, 3.50570941481406106455e-01f, + 3.47182563956793643900e-01f, 3.43809713146850715049e-01f, + 3.40452257044521866547e-01f, 3.37110066637006045021e-01f, + 3.33783015830718454708e-01f, 3.30470981379163586400e-01f, + 3.27173842813601400970e-01f, 3.23891482376391093290e-01f, + 3.20623784956905355514e-01f, 3.17370638029913609834e-01f, + 3.14131931596337177215e-01f, 3.10907558126286509559e-01f, + 3.07697412504292056035e-01f, 3.04501391976649993243e-01f, + 3.01319396100803049698e-01f, 2.98151326696685481377e-01f, + 2.94997087799961810184e-01f, 2.91856585617095209972e-01f, + 2.88729728482182923521e-01f, 2.85616426815501756042e-01f, + 2.82516593083707578948e-01f, 2.79430141761637940157e-01f, + 2.76356989295668320494e-01f, 2.73297054068577072172e-01f, + 2.70250256365875463072e-01f, 2.67216518343561471038e-01f, + 2.64195763997261190426e-01f, 2.61187919132721213522e-01f, + 2.58192911337619235290e-01f, 2.55210669954661961700e-01f, + 2.52241126055942177508e-01f, 2.49284212418528522415e-01f, + 2.46339863501263828249e-01f, 2.43408015422750312329e-01f, + 2.40488605940500588254e-01f, 2.37581574431238090606e-01f, + 2.34686861872330010392e-01f, 2.31804410824338724684e-01f, + 2.28934165414680340644e-01f, 2.26076071322380278694e-01f, + 2.23230075763917484855e-01f, 2.20396127480151998723e-01f, + 2.17574176724331130872e-01f, 2.14764175251173583536e-01f, + 2.11966076307030182324e-01f, 2.09179834621125076977e-01f, + 2.06405406397880797353e-01f, 2.03642749310334908452e-01f, + 2.00891822494656591136e-01f, 1.98152586545775138971e-01f, + 1.95425003514134304483e-01f, 1.92709036903589175926e-01f, + 1.90004651670464985713e-01f, 1.87311814223800304768e-01f, + 1.84630492426799269756e-01f, 1.81960655599522513892e-01f, + 1.79302274522847582272e-01f, 1.76655321443734858455e-01f, + 1.74019770081838553999e-01f, 1.71395595637505754327e-01f, + 1.68782774801211288285e-01f, 1.66181285764481906364e-01f, + 1.63591108232365584074e-01f, 1.61012223437511009516e-01f, + 1.58444614155924284882e-01f, 1.55888264724479197465e-01f, + 1.53343161060262855866e-01f, 1.50809290681845675763e-01f, + 1.48286642732574552861e-01f, 1.45775208005994028060e-01f, + 1.43274978973513461566e-01f, 1.40785949814444699690e-01f, + 1.38308116448550733057e-01f, 1.35841476571253755301e-01f, + 1.33386029691669155683e-01f, 1.30941777173644358090e-01f, + 1.28508722279999570981e-01f, 1.26086870220185887081e-01f, + 1.23676228201596571932e-01f, 1.21276805484790306533e-01f, + 1.18888613442910059947e-01f, 1.16511665625610869035e-01f, + 1.14145977827838487895e-01f, 1.11791568163838089811e-01f, + 1.09448457146811797824e-01f, 1.07116667774683801961e-01f, + 1.04796225622487068629e-01f, 1.02487158941935246892e-01f, + 1.00189498768810017482e-01f, 9.79032790388624646338e-02f, + 9.56285367130089991594e-02f, 9.33653119126910124859e-02f, + 9.11136480663737591268e-02f, 8.88735920682758862021e-02f, + 8.66451944505580717859e-02f, 8.44285095703534715916e-02f, + 8.22235958132029043366e-02f, 8.00305158146630696292e-02f, + 7.78493367020961224423e-02f, 7.56801303589271778804e-02f, + 7.35229737139813238622e-02f, 7.13779490588904025339e-02f, + 6.92451443970067553879e-02f, 6.71246538277884968737e-02f, + 6.50165779712428976156e-02f, 6.29210244377581412456e-02f, + 6.08381083495398780614e-02f, 5.87679529209337372930e-02f, + 5.67106901062029017391e-02f, 5.46664613248889208474e-02f, + 5.26354182767921896513e-02f, 5.06177238609477817000e-02f, + 4.86135532158685421122e-02f, 4.66230949019303814174e-02f, + 4.46465522512944634759e-02f, 4.26841449164744590750e-02f, + 4.07361106559409394401e-02f, 3.88027074045261474722e-02f, + 3.68842156885673053135e-02f, 3.49809414617161251737e-02f, + 3.30932194585785779961e-02f, 3.12214171919203004046e-02f, + 2.93659397581333588001e-02f, 2.75272356696031131329e-02f, + 2.57058040085489103443e-02f, 2.39022033057958785407e-02f, + 2.21170627073088502113e-02f, 2.03510962300445102935e-02f, + 1.86051212757246224594e-02f, 1.68800831525431419000e-02f, + 1.51770883079353092332e-02f, 1.34974506017398673818e-02f, + 1.18427578579078790488e-02f, 1.02149714397014590439e-02f, + 8.61658276939872638800e-03f, 7.05087547137322242369e-03f, + 5.52240329925099155545e-03f, 4.03797259336302356153e-03f, + 2.60907274610215926189e-03f, 1.26028593049859797236e-03f}; + +static const uint64_t ke_double[] = { + 0x001C5214272497C6, 0x0000000000000000, 0x00137D5BD79C317E, + 0x00186EF58E3F3C10, 0x001A9BB7320EB0AE, 0x001BD127F719447C, + 0x001C951D0F88651A, 0x001D1BFE2D5C3972, 0x001D7E5BD56B18B2, + 0x001DC934DD172C70, 0x001E0409DFAC9DC8, 0x001E337B71D47836, + 0x001E5A8B177CB7A2, 0x001E7B42096F046C, 0x001E970DAF08AE3E, + 0x001EAEF5B14EF09E, 0x001EC3BD07B46556, 0x001ED5F6F08799CE, + 0x001EE614AE6E5688, 0x001EF46ECA361CD0, 0x001F014B76DDD4A4, + 0x001F0CE313A796B6, 0x001F176369F1F77A, 0x001F20F20C452570, + 0x001F29AE1951A874, 0x001F31B18FB95532, 0x001F39125157C106, + 0x001F3FE2EB6E694C, 0x001F463332D788FA, 0x001F4C10BF1D3A0E, + 0x001F51874C5C3322, 0x001F56A109C3ECC0, 0x001F5B66D9099996, + 0x001F5FE08210D08C, 0x001F6414DD445772, 0x001F6809F6859678, + 0x001F6BC52A2B02E6, 0x001F6F4B3D32E4F4, 0x001F72A07190F13A, + 0x001F75C8974D09D6, 0x001F78C71B045CC0, 0x001F7B9F12413FF4, + 0x001F7E5346079F8A, 0x001F80E63BE21138, 0x001F835A3DAD9162, + 0x001F85B16056B912, 0x001F87ED89B24262, 0x001F8A10759374FA, + 0x001F8C1BBA3D39AC, 0x001F8E10CC45D04A, 0x001F8FF102013E16, + 0x001F91BD968358E0, 0x001F9377AC47AFD8, 0x001F95204F8B64DA, + 0x001F96B878633892, 0x001F98410C968892, 0x001F99BAE146BA80, + 0x001F9B26BC697F00, 0x001F9C85561B717A, 0x001F9DD759CFD802, + 0x001F9F1D6761A1CE, 0x001FA058140936C0, 0x001FA187EB3A3338, + 0x001FA2AD6F6BC4FC, 0x001FA3C91ACE0682, 0x001FA4DB5FEE6AA2, + 0x001FA5E4AA4D097C, 0x001FA6E55EE46782, 0x001FA7DDDCA51EC4, + 0x001FA8CE7CE6A874, 0x001FA9B793CE5FEE, 0x001FAA9970ADB858, + 0x001FAB745E588232, 0x001FAC48A3740584, 0x001FAD1682BF9FE8, + 0x001FADDE3B5782C0, 0x001FAEA008F21D6C, 0x001FAF5C2418B07E, + 0x001FB012C25B7A12, 0x001FB0C41681DFF4, 0x001FB17050B6F1FA, + 0x001FB2179EB2963A, 0x001FB2BA2BDFA84A, 0x001FB358217F4E18, + 0x001FB3F1A6C9BE0C, 0x001FB486E10CACD6, 0x001FB517F3C793FC, + 0x001FB5A500C5FDAA, 0x001FB62E2837FE58, 0x001FB6B388C9010A, + 0x001FB7353FB50798, 0x001FB7B368DC7DA8, 0x001FB82E1ED6BA08, + 0x001FB8A57B0347F6, 0x001FB919959A0F74, 0x001FB98A85BA7204, + 0x001FB9F861796F26, 0x001FBA633DEEE286, 0x001FBACB2F41EC16, + 0x001FBB3048B49144, 0x001FBB929CAEA4E2, 0x001FBBF23CC8029E, + 0x001FBC4F39D22994, 0x001FBCA9A3E140D4, 0x001FBD018A548F9E, + 0x001FBD56FBDE729C, 0x001FBDAA068BD66A, 0x001FBDFAB7CB3F40, + 0x001FBE491C7364DE, 0x001FBE9540C9695E, 0x001FBEDF3086B128, + 0x001FBF26F6DE6174, 0x001FBF6C9E828AE2, 0x001FBFB031A904C4, + 0x001FBFF1BA0FFDB0, 0x001FC03141024588, 0x001FC06ECF5B54B2, + 0x001FC0AA6D8B1426, 0x001FC0E42399698A, 0x001FC11BF9298A64, + 0x001FC151F57D1942, 0x001FC1861F770F4A, 0x001FC1B87D9E74B4, + 0x001FC1E91620EA42, 0x001FC217EED505DE, 0x001FC2450D3C83FE, + 0x001FC27076864FC2, 0x001FC29A2F90630E, 0x001FC2C23CE98046, + 0x001FC2E8A2D2C6B4, 0x001FC30D654122EC, 0x001FC33087DE9C0E, + 0x001FC3520E0B7EC6, 0x001FC371FADF66F8, 0x001FC390512A2886, + 0x001FC3AD137497FA, 0x001FC3C844013348, 0x001FC3E1E4CCAB40, + 0x001FC3F9F78E4DA8, 0x001FC4107DB85060, 0x001FC4257877FD68, + 0x001FC438E8B5BFC6, 0x001FC44ACF15112A, 0x001FC45B2BF447E8, + 0x001FC469FF6C4504, 0x001FC477495001B2, 0x001FC483092BFBB8, + 0x001FC48D3E457FF6, 0x001FC495E799D21A, 0x001FC49D03DD30B0, + 0x001FC4A29179B432, 0x001FC4A68E8E07FC, 0x001FC4A8F8EBFB8C, + 0x001FC4A9CE16EA9E, 0x001FC4A90B41FA34, 0x001FC4A6AD4E28A0, + 0x001FC4A2B0C82E74, 0x001FC49D11E62DE2, 0x001FC495CC852DF4, + 0x001FC48CDC265EC0, 0x001FC4823BEC237A, 0x001FC475E696DEE6, + 0x001FC467D6817E82, 0x001FC458059DC036, 0x001FC4466D702E20, + 0x001FC433070BCB98, 0x001FC41DCB0D6E0E, 0x001FC406B196BBF6, + 0x001FC3EDB248CB62, 0x001FC3D2C43E593C, 0x001FC3B5DE0591B4, + 0x001FC396F599614C, 0x001FC376005A4592, 0x001FC352F3069370, + 0x001FC32DC1B22818, 0x001FC3065FBD7888, 0x001FC2DCBFCBF262, + 0x001FC2B0D3B99F9E, 0x001FC2828C8FFCF0, 0x001FC251DA79F164, + 0x001FC21EACB6D39E, 0x001FC1E8F18C6756, 0x001FC1B09637BB3C, + 0x001FC17586DCCD10, 0x001FC137AE74D6B6, 0x001FC0F6F6BB2414, + 0x001FC0B348184DA4, 0x001FC06C898BAFF0, 0x001FC022A092F364, + 0x001FBFD5710F72B8, 0x001FBF84DD29488E, 0x001FBF30C52FC60A, + 0x001FBED907770CC6, 0x001FBE7D80327DDA, 0x001FBE1E094BA614, + 0x001FBDBA7A354408, 0x001FBD52A7B9F826, 0x001FBCE663C6201A, + 0x001FBC757D2C4DE4, 0x001FBBFFBF63B7AA, 0x001FBB84F23FE6A2, + 0x001FBB04D9A0D18C, 0x001FBA7F351A70AC, 0x001FB9F3BF92B618, + 0x001FB9622ED4ABFC, 0x001FB8CA33174A16, 0x001FB82B76765B54, + 0x001FB7859C5B895C, 0x001FB6D840D55594, 0x001FB622F7D96942, + 0x001FB5654C6F37E0, 0x001FB49EBFBF69D2, 0x001FB3CEC803E746, + 0x001FB2F4CF539C3E, 0x001FB21032442852, 0x001FB1203E5A9604, + 0x001FB0243042E1C2, 0x001FAF1B31C479A6, 0x001FAE045767E104, + 0x001FACDE9DBF2D72, 0x001FABA8E640060A, 0x001FAA61F399FF28, + 0x001FA908656F66A2, 0x001FA79AB3508D3C, 0x001FA61726D1F214, + 0x001FA47BD48BEA00, 0x001FA2C693C5C094, 0x001FA0F4F47DF314, + 0x001F9F04336BBE0A, 0x001F9CF12B79F9BC, 0x001F9AB84415ABC4, + 0x001F98555B782FB8, 0x001F95C3ABD03F78, 0x001F92FDA9CEF1F2, + 0x001F8FFCDA9AE41C, 0x001F8CB99E7385F8, 0x001F892AEC479606, + 0x001F8545F904DB8E, 0x001F80FDC336039A, 0x001F7C427839E926, + 0x001F7700A3582ACC, 0x001F71200F1A241C, 0x001F6A8234B7352A, + 0x001F630000A8E266, 0x001F5A66904FE3C4, 0x001F50724ECE1172, + 0x001F44C7665C6FDA, 0x001F36E5A38A59A2, 0x001F26143450340A, + 0x001F113E047B0414, 0x001EF6AEFA57CBE6, 0x001ED38CA188151E, + 0x001EA2A61E122DB0, 0x001E5961C78B267C, 0x001DDDF62BAC0BB0, + 0x001CDB4DD9E4E8C0}; + +static const double we_double[] = { + 9.655740063209182975e-16, 7.089014243955414331e-18, + 1.163941249669122378e-17, 1.524391512353216015e-17, + 1.833284885723743916e-17, 2.108965109464486630e-17, + 2.361128077843138196e-17, 2.595595772310893952e-17, + 2.816173554197752338e-17, 3.025504130321382330e-17, + 3.225508254836375280e-17, 3.417632340185027033e-17, + 3.602996978734452488e-17, 3.782490776869649048e-17, + 3.956832198097553231e-17, 4.126611778175946428e-17, + 4.292321808442525631e-17, 4.454377743282371417e-17, + 4.613133981483185932e-17, 4.768895725264635940e-17, + 4.921928043727962847e-17, 5.072462904503147014e-17, + 5.220704702792671737e-17, 5.366834661718192181e-17, + 5.511014372835094717e-17, 5.653388673239667134e-17, + 5.794088004852766616e-17, 5.933230365208943081e-17, + 6.070922932847179572e-17, 6.207263431163193485e-17, + 6.342341280303076511e-17, 6.476238575956142121e-17, + 6.609030925769405241e-17, 6.740788167872722244e-17, + 6.871574991183812442e-17, 7.001451473403929616e-17, + 7.130473549660643409e-17, 7.258693422414648352e-17, + 7.386159921381791997e-17, 7.512918820723728089e-17, + 7.639013119550825792e-17, 7.764483290797848102e-17, + 7.889367502729790548e-17, 8.013701816675454434e-17, + 8.137520364041762206e-17, 8.260855505210038174e-17, + 8.383737972539139383e-17, 8.506196999385323132e-17, + 8.628260436784112996e-17, 8.749954859216182511e-17, + 8.871305660690252281e-17, 8.992337142215357066e-17, + 9.113072591597909173e-17, 9.233534356381788123e-17, + 9.353743910649128938e-17, 9.473721916312949566e-17, + 9.593488279457997317e-17, 9.713062202221521206e-17, + 9.832462230649511362e-17, 9.951706298915071878e-17, + 1.007081177024294931e-16, 1.018979547484694078e-16, + 1.030867374515421954e-16, 1.042746244856188556e-16, + 1.054617701794576406e-16, 1.066483248011914702e-16, + 1.078344348241948498e-16, 1.090202431758350473e-16, + 1.102058894705578110e-16, 1.113915102286197502e-16, + 1.125772390816567488e-16, 1.137632069661684705e-16, + 1.149495423059009298e-16, 1.161363711840218308e-16, + 1.173238175059045788e-16, 1.185120031532669434e-16, + 1.197010481303465158e-16, 1.208910707027385520e-16, + 1.220821875294706151e-16, 1.232745137888415193e-16, + 1.244681632985112523e-16, 1.256632486302898513e-16, + 1.268598812200397542e-16, 1.280581714730749379e-16, + 1.292582288654119552e-16, 1.304601620412028847e-16, + 1.316640789066572582e-16, 1.328700867207380889e-16, + 1.340782921828999433e-16, 1.352888015181175458e-16, + 1.365017205594397770e-16, 1.377171548282880964e-16, + 1.389352096127063919e-16, 1.401559900437571538e-16, + 1.413796011702485188e-16, 1.426061480319665444e-16, + 1.438357357315790180e-16, 1.450684695053687684e-16, + 1.463044547929475721e-16, 1.475437973060951633e-16, + 1.487866030968626066e-16, 1.500329786250736949e-16, + 1.512830308253539427e-16, 1.525368671738125550e-16, + 1.537945957544996933e-16, 1.550563253257577148e-16, + 1.563221653865837505e-16, 1.575922262431176140e-16, + 1.588666190753684151e-16, 1.601454560042916733e-16, + 1.614288501593278662e-16, 1.627169157465130500e-16, + 1.640097681172717950e-16, 1.653075238380036909e-16, + 1.666103007605742067e-16, 1.679182180938228863e-16, + 1.692313964762022267e-16, 1.705499580496629830e-16, + 1.718740265349031656e-16, 1.732037273081008369e-16, + 1.745391874792533975e-16, 1.758805359722491379e-16, + 1.772279036068006489e-16, 1.785814231823732619e-16, + 1.799412295642463721e-16, 1.813074597718501559e-16, + 1.826802530695252266e-16, 1.840597510598587828e-16, + 1.854460977797569461e-16, 1.868394397994192684e-16, + 1.882399263243892051e-16, 1.896477093008616722e-16, + 1.910629435244376536e-16, 1.924857867525243818e-16, + 1.939163998205899420e-16, 1.953549467624909132e-16, + 1.968015949351037382e-16, 1.982565151475019047e-16, + 1.997198817949342081e-16, 2.011918729978734671e-16, + 2.026726707464198289e-16, 2.041624610503588774e-16, + 2.056614340951917875e-16, 2.071697844044737034e-16, + 2.086877110088159721e-16, 2.102154176219292789e-16, + 2.117531128241075913e-16, 2.133010102535779087e-16, + 2.148593288061663316e-16, 2.164282928437604723e-16, + 2.180081324120784027e-16, 2.195990834682870728e-16, + 2.212013881190495942e-16, 2.228152948696180545e-16, + 2.244410588846308588e-16, 2.260789422613173739e-16, + 2.277292143158621037e-16, 2.293921518837311354e-16, + 2.310680396348213318e-16, 2.327571704043534613e-16, + 2.344598455404957859e-16, 2.361763752697773994e-16, + 2.379070790814276700e-16, 2.396522861318623520e-16, + 2.414123356706293277e-16, 2.431875774892255956e-16, + 2.449783723943070217e-16, 2.467850927069288738e-16, + 2.486081227895851719e-16, 2.504478596029557040e-16, + 2.523047132944217013e-16, 2.541791078205812227e-16, + 2.560714816061770759e-16, 2.579822882420530896e-16, + 2.599119972249746917e-16, 2.618610947423924219e-16, + 2.638300845054942823e-16, 2.658194886341845120e-16, + 2.678298485979525166e-16, 2.698617262169488933e-16, + 2.719157047279818500e-16, 2.739923899205814823e-16, + 2.760924113487617126e-16, 2.782164236246436081e-16, + 2.803651078006983464e-16, 2.825391728480253184e-16, + 2.847393572388174091e-16, 2.869664306419817679e-16, + 2.892211957417995598e-16, 2.915044901905293183e-16, + 2.938171887070028633e-16, 2.961602053345465687e-16, + 2.985344958730045276e-16, 3.009410605012618141e-16, + 3.033809466085003416e-16, 3.058552518544860874e-16, + 3.083651274815310004e-16, 3.109117819034266344e-16, + 3.134964845996663118e-16, 3.161205703467105734e-16, + 3.187854438219713117e-16, 3.214925846206797361e-16, + 3.242435527309451638e-16, 3.270399945182240440e-16, + 3.298836492772283149e-16, 3.327763564171671408e-16, + 3.357200633553244075e-16, 3.387168342045505162e-16, + 3.417688593525636996e-16, 3.448784660453423890e-16, + 3.480481301037442286e-16, 3.512804889222979418e-16, + 3.545783559224791863e-16, 3.579447366604276541e-16, + 3.613828468219060593e-16, 3.648961323764542545e-16, + 3.684882922095621322e-16, 3.721633036080207290e-16, + 3.759254510416256532e-16, 3.797793587668874387e-16, + 3.837300278789213687e-16, 3.877828785607895292e-16, + 3.919437984311428867e-16, 3.962191980786774996e-16, + 4.006160751056541688e-16, 4.051420882956573177e-16, + 4.098056438903062509e-16, 4.146159964290904582e-16, + 4.195833672073398926e-16, 4.247190841824385048e-16, + 4.300357481667470702e-16, 4.355474314693952008e-16, + 4.412699169036069903e-16, 4.472209874259932285e-16, + 4.534207798565834480e-16, 4.598922204905932469e-16, + 4.666615664711475780e-16, 4.737590853262492027e-16, + 4.812199172829237933e-16, 4.890851827392209900e-16, + 4.974034236191939753e-16, 5.062325072144159699e-16, + 5.156421828878082953e-16, 5.257175802022274839e-16, + 5.365640977112021618e-16, 5.483144034258703912e-16, + 5.611387454675159622e-16, 5.752606481503331688e-16, + 5.909817641652102998e-16, 6.087231416180907671e-16, + 6.290979034877557049e-16, 6.530492053564040799e-16, + 6.821393079028928626e-16, 7.192444966089361564e-16, + 7.706095350032096755e-16, 8.545517038584027421e-16}; + +static const double fe_double[] = { + 1.000000000000000000e+00, 9.381436808621747003e-01, + 9.004699299257464817e-01, 8.717043323812035949e-01, + 8.477855006239896074e-01, 8.269932966430503241e-01, + 8.084216515230083777e-01, 7.915276369724956185e-01, + 7.759568520401155522e-01, 7.614633888498962833e-01, + 7.478686219851951034e-01, 7.350380924314234843e-01, + 7.228676595935720206e-01, 7.112747608050760117e-01, + 7.001926550827881623e-01, 6.895664961170779872e-01, + 6.793505722647653622e-01, 6.695063167319247333e-01, + 6.600008410789997004e-01, 6.508058334145710999e-01, + 6.418967164272660897e-01, 6.332519942143660652e-01, + 6.248527387036659775e-01, 6.166821809152076561e-01, + 6.087253820796220127e-01, 6.009689663652322267e-01, + 5.934009016917334289e-01, 5.860103184772680329e-01, + 5.787873586028450257e-01, 5.717230486648258170e-01, + 5.648091929124001709e-01, 5.580382822625874484e-01, + 5.514034165406412891e-01, 5.448982376724396115e-01, + 5.385168720028619127e-01, 5.322538802630433219e-01, + 5.261042139836197284e-01, 5.200631773682335979e-01, + 5.141263938147485613e-01, 5.082897764106428795e-01, + 5.025495018413477233e-01, 4.969019872415495476e-01, + 4.913438695940325340e-01, 4.858719873418849144e-01, + 4.804833639304542103e-01, 4.751751930373773747e-01, + 4.699448252839599771e-01, 4.647897562504261781e-01, + 4.597076156421376902e-01, 4.546961574746155033e-01, + 4.497532511627549967e-01, 4.448768734145485126e-01, + 4.400651008423538957e-01, 4.353161032156365740e-01, + 4.306281372884588343e-01, 4.259995411430343437e-01, + 4.214287289976165751e-01, 4.169141864330028757e-01, + 4.124544659971611793e-01, 4.080481831520323954e-01, + 4.036940125305302773e-01, 3.993906844752310725e-01, + 3.951369818332901573e-01, 3.909317369847971069e-01, + 3.867738290841376547e-01, 3.826621814960098344e-01, + 3.785957594095807899e-01, 3.745735676159021588e-01, + 3.705946484351460013e-01, 3.666580797815141568e-01, + 3.627629733548177748e-01, 3.589084729487497794e-01, + 3.550937528667874599e-01, 3.513180164374833381e-01, + 3.475804946216369817e-01, 3.438804447045024082e-01, + 3.402171490667800224e-01, 3.365899140286776059e-01, + 3.329980687618089852e-01, 3.294409642641363267e-01, + 3.259179723935561879e-01, 3.224284849560891675e-01, + 3.189719128449572394e-01, 3.155476852271289490e-01, + 3.121552487741795501e-01, 3.087940669345601852e-01, + 3.054636192445902565e-01, 3.021634006756935276e-01, + 2.988929210155817917e-01, 2.956517042812611962e-01, + 2.924392881618925744e-01, 2.892552234896777485e-01, + 2.860990737370768255e-01, 2.829704145387807457e-01, + 2.798688332369729248e-01, 2.767939284485173568e-01, + 2.737453096528029706e-01, 2.707225967990600224e-01, + 2.677254199320447947e-01, 2.647534188350622042e-01, + 2.618062426893629779e-01, 2.588835497490162285e-01, + 2.559850070304153791e-01, 2.531102900156294577e-01, + 2.502590823688622956e-01, 2.474310756653276266e-01, + 2.446259691318921070e-01, 2.418434693988772144e-01, + 2.390832902624491774e-01, 2.363451524570596429e-01, + 2.336287834374333461e-01, 2.309339171696274118e-01, + 2.282602939307167011e-01, 2.256076601166840667e-01, + 2.229757680581201940e-01, 2.203643758433594946e-01, + 2.177732471487005272e-01, 2.152021510753786837e-01, + 2.126508619929782795e-01, 2.101191593889882581e-01, + 2.076068277242220372e-01, 2.051136562938377095e-01, + 2.026394390937090173e-01, 2.001839746919112650e-01, + 1.977470661050988732e-01, 1.953285206795632167e-01, + 1.929281499767713515e-01, 1.905457696631953912e-01, + 1.881811994042543179e-01, 1.858342627621971110e-01, + 1.835047870977674633e-01, 1.811926034754962889e-01, + 1.788975465724783054e-01, 1.766194545904948843e-01, + 1.743581691713534942e-01, 1.721135353153200598e-01, + 1.698854013025276610e-01, 1.676736186172501919e-01, + 1.654780418749360049e-01, 1.632985287519018169e-01, + 1.611349399175920349e-01, 1.589871389693142123e-01, + 1.568549923693652315e-01, 1.547383693844680830e-01, + 1.526371420274428570e-01, 1.505511850010398944e-01, + 1.484803756438667910e-01, 1.464245938783449441e-01, + 1.443837221606347754e-01, 1.423576454324722018e-01, + 1.403462510748624548e-01, 1.383494288635802039e-01, + 1.363670709264288572e-01, 1.343990717022136294e-01, + 1.324453279013875218e-01, 1.305057384683307731e-01, + 1.285802045452281717e-01, 1.266686294375106714e-01, + 1.247709185808309612e-01, 1.228869795095451356e-01, + 1.210167218266748335e-01, 1.191600571753276827e-01, + 1.173168992115555670e-01, 1.154871635786335338e-01, + 1.136707678827443141e-01, 1.118676316700562973e-01, + 1.100776764051853845e-01, 1.083008254510337970e-01, + 1.065370040500016602e-01, 1.047861393065701724e-01, + 1.030481601712577161e-01, 1.013229974259536315e-01, + 9.961058367063713170e-02, 9.791085331149219917e-02, + 9.622374255043279756e-02, 9.454918937605585882e-02, + 9.288713355604354127e-02, 9.123751663104015530e-02, + 8.960028191003285847e-02, 8.797537446727021759e-02, + 8.636274114075691288e-02, 8.476233053236811865e-02, + 8.317409300963238272e-02, 8.159798070923741931e-02, + 8.003394754231990538e-02, 7.848194920160642130e-02, + 7.694194317048050347e-02, 7.541388873405840965e-02, + 7.389774699236474620e-02, 7.239348087570873780e-02, + 7.090105516237182881e-02, 6.942043649872875477e-02, + 6.795159342193660135e-02, 6.649449638533977414e-02, + 6.504911778675374900e-02, 6.361543199980733421e-02, + 6.219341540854099459e-02, 6.078304644547963265e-02, + 5.938430563342026597e-02, 5.799717563120065922e-02, + 5.662164128374287675e-02, 5.525768967669703741e-02, + 5.390531019604608703e-02, 5.256449459307169225e-02, + 5.123523705512628146e-02, 4.991753428270637172e-02, + 4.861138557337949667e-02, 4.731679291318154762e-02, + 4.603376107617516977e-02, 4.476229773294328196e-02, + 4.350241356888818328e-02, 4.225412241331623353e-02, + 4.101744138041481941e-02, 3.979239102337412542e-02, + 3.857899550307485742e-02, 3.737728277295936097e-02, + 3.618728478193142251e-02, 3.500903769739741045e-02, + 3.384258215087432992e-02, 3.268796350895953468e-02, + 3.154523217289360859e-02, 3.041444391046660423e-02, + 2.929566022463739317e-02, 2.818894876397863569e-02, + 2.709438378095579969e-02, 2.601204664513421735e-02, + 2.494202641973178314e-02, 2.388442051155817078e-02, + 2.283933540638524023e-02, 2.180688750428358066e-02, + 2.078720407257811723e-02, 1.978042433800974303e-02, + 1.878670074469603046e-02, 1.780620041091136169e-02, + 1.683910682603994777e-02, 1.588562183997316302e-02, + 1.494596801169114850e-02, 1.402039140318193759e-02, + 1.310916493125499106e-02, 1.221259242625538123e-02, + 1.133101359783459695e-02, 1.046481018102997894e-02, + 9.614413642502209895e-03, 8.780314985808975251e-03, + 7.963077438017040002e-03, 7.163353183634983863e-03, + 6.381905937319179087e-03, 5.619642207205483020e-03, + 4.877655983542392333e-03, 4.157295120833795314e-03, + 3.460264777836904049e-03, 2.788798793574076128e-03, + 2.145967743718906265e-03, 1.536299780301572356e-03, + 9.672692823271745359e-04, 4.541343538414967652e-04}; + +static const uint32_t ke_float[] = { + 0x00714851UL, 0x00000000UL, 0x004DF56FUL, 0x0061BBD6UL, 0x006A6EDDUL, + 0x006F44A0UL, 0x00725474UL, 0x00746FF9UL, 0x0075F96FUL, 0x007724D3UL, + 0x00781027UL, 0x0078CDEEUL, 0x00796A2CUL, 0x0079ED08UL, 0x007A5C37UL, + 0x007ABBD7UL, 0x007B0EF4UL, 0x007B57DCUL, 0x007B9853UL, 0x007BD1BBUL, + 0x007C052EUL, 0x007C338CUL, 0x007C5D8EUL, 0x007C83C8UL, 0x007CA6B8UL, + 0x007CC6C6UL, 0x007CE449UL, 0x007CFF8CUL, 0x007D18CDUL, 0x007D3043UL, + 0x007D461DUL, 0x007D5A84UL, 0x007D6D9BUL, 0x007D7F82UL, 0x007D9053UL, + 0x007DA028UL, 0x007DAF15UL, 0x007DBD2DUL, 0x007DCA82UL, 0x007DD722UL, + 0x007DE31CUL, 0x007DEE7CUL, 0x007DF94DUL, 0x007E0399UL, 0x007E0D69UL, + 0x007E16C6UL, 0x007E1FB6UL, 0x007E2842UL, 0x007E306FUL, 0x007E3843UL, + 0x007E3FC4UL, 0x007E46F6UL, 0x007E4DDFUL, 0x007E5481UL, 0x007E5AE2UL, + 0x007E6104UL, 0x007E66ECUL, 0x007E6C9BUL, 0x007E7215UL, 0x007E775DUL, + 0x007E7C76UL, 0x007E8160UL, 0x007E8620UL, 0x007E8AB6UL, 0x007E8F24UL, + 0x007E936DUL, 0x007E9793UL, 0x007E9B95UL, 0x007E9F77UL, 0x007EA33AUL, + 0x007EA6DEUL, 0x007EAA66UL, 0x007EADD1UL, 0x007EB123UL, 0x007EB45AUL, + 0x007EB779UL, 0x007EBA80UL, 0x007EBD71UL, 0x007EC04BUL, 0x007EC310UL, + 0x007EC5C1UL, 0x007EC85EUL, 0x007ECAE9UL, 0x007ECD61UL, 0x007ECFC7UL, + 0x007ED21CUL, 0x007ED460UL, 0x007ED694UL, 0x007ED8B9UL, 0x007EDACEUL, + 0x007EDCD5UL, 0x007EDECEUL, 0x007EE0B8UL, 0x007EE296UL, 0x007EE466UL, + 0x007EE62AUL, 0x007EE7E2UL, 0x007EE98DUL, 0x007EEB2DUL, 0x007EECC1UL, + 0x007EEE4AUL, 0x007EEFC9UL, 0x007EF13DUL, 0x007EF2A7UL, 0x007EF406UL, + 0x007EF55CUL, 0x007EF6A8UL, 0x007EF7EBUL, 0x007EF924UL, 0x007EFA55UL, + 0x007EFB7DUL, 0x007EFC9CUL, 0x007EFDB2UL, 0x007EFEC1UL, 0x007EFFC7UL, + 0x007F00C5UL, 0x007F01BBUL, 0x007F02AAUL, 0x007F0391UL, 0x007F0470UL, + 0x007F0548UL, 0x007F0618UL, 0x007F06E2UL, 0x007F07A4UL, 0x007F0860UL, + 0x007F0914UL, 0x007F09C2UL, 0x007F0A69UL, 0x007F0B09UL, 0x007F0BA3UL, + 0x007F0C36UL, 0x007F0CC2UL, 0x007F0D48UL, 0x007F0DC8UL, 0x007F0E41UL, + 0x007F0EB4UL, 0x007F0F21UL, 0x007F0F88UL, 0x007F0FE8UL, 0x007F1042UL, + 0x007F1096UL, 0x007F10E4UL, 0x007F112BUL, 0x007F116DUL, 0x007F11A8UL, + 0x007F11DDUL, 0x007F120CUL, 0x007F1235UL, 0x007F1258UL, 0x007F1274UL, + 0x007F128AUL, 0x007F129AUL, 0x007F12A4UL, 0x007F12A7UL, 0x007F12A4UL, + 0x007F129BUL, 0x007F128BUL, 0x007F1274UL, 0x007F1257UL, 0x007F1233UL, + 0x007F1209UL, 0x007F11D8UL, 0x007F119FUL, 0x007F1160UL, 0x007F111AUL, + 0x007F10CCUL, 0x007F1077UL, 0x007F101BUL, 0x007F0FB7UL, 0x007F0F4BUL, + 0x007F0ED7UL, 0x007F0E5CUL, 0x007F0DD8UL, 0x007F0D4CUL, 0x007F0CB7UL, + 0x007F0C19UL, 0x007F0B73UL, 0x007F0AC3UL, 0x007F0A0AUL, 0x007F0947UL, + 0x007F087BUL, 0x007F07A4UL, 0x007F06C2UL, 0x007F05D6UL, 0x007F04DFUL, + 0x007F03DCUL, 0x007F02CDUL, 0x007F01B2UL, 0x007F008BUL, 0x007EFF56UL, + 0x007EFE13UL, 0x007EFCC3UL, 0x007EFB64UL, 0x007EF9F6UL, 0x007EF878UL, + 0x007EF6EAUL, 0x007EF54BUL, 0x007EF39AUL, 0x007EF1D6UL, 0x007EEFFFUL, + 0x007EEE14UL, 0x007EEC13UL, 0x007EE9FDUL, 0x007EE7CFUL, 0x007EE589UL, + 0x007EE329UL, 0x007EE0AEUL, 0x007EDE16UL, 0x007EDB61UL, 0x007ED88CUL, + 0x007ED595UL, 0x007ED27BUL, 0x007ECF3BUL, 0x007ECBD3UL, 0x007EC841UL, + 0x007EC481UL, 0x007EC091UL, 0x007EBC6DUL, 0x007EB811UL, 0x007EB37AUL, + 0x007EAEA4UL, 0x007EA988UL, 0x007EA422UL, 0x007E9E6BUL, 0x007E985DUL, + 0x007E91EFUL, 0x007E8B1AUL, 0x007E83D4UL, 0x007E7C11UL, 0x007E73C5UL, + 0x007E6AE1UL, 0x007E6155UL, 0x007E570FUL, 0x007E4BF7UL, 0x007E3FF3UL, + 0x007E32E6UL, 0x007E24ACUL, 0x007E1518UL, 0x007E03F7UL, 0x007DF10AUL, + 0x007DDC03UL, 0x007DC480UL, 0x007DAA09UL, 0x007D8C00UL, 0x007D699AUL, + 0x007D41C9UL, 0x007D131EUL, 0x007CDB97UL, 0x007C9851UL, 0x007C44F8UL, + 0x007BDABCUL, 0x007B4E33UL, 0x007A8A98UL, 0x00796587UL, 0x007777D9UL, + 0x00736D37UL, +}; +static const float we_float[] = { + 1.03677719e-06F, 7.61177108e-09F, 1.24977240e-08F, 1.63680292e-08F, + 1.96847466e-08F, 2.26448404e-08F, 2.53524197e-08F, 2.78699974e-08F, + 3.02384333e-08F, 3.24861032e-08F, 3.46336312e-08F, 3.66965478e-08F, + 3.86868855e-08F, 4.06141855e-08F, 4.24861622e-08F, 4.43091566e-08F, + 4.60884545e-08F, 4.78285168e-08F, 4.95331490e-08F, 5.12056279e-08F, + 5.28488000e-08F, 5.44651557e-08F, 5.60568899e-08F, 5.76259484e-08F, + 5.91740662e-08F, 6.07027987e-08F, 6.22135462e-08F, 6.37075759e-08F, + 6.51860386e-08F, 6.66499836e-08F, 6.81003709e-08F, 6.95380822e-08F, + 7.09639292e-08F, 7.23786618e-08F, 7.37829746e-08F, 7.51775128e-08F, + 7.65628768e-08F, 7.79396272e-08F, 7.93082883e-08F, 8.06693516e-08F, + 8.20232788e-08F, 8.33705045e-08F, 8.47114385e-08F, 8.60464681e-08F, + 8.73759596e-08F, 8.87002606e-08F, 9.00197010e-08F, 9.13345948e-08F, + 9.26452410e-08F, 9.39519249e-08F, 9.52549192e-08F, 9.65544849e-08F, + 9.78508719e-08F, 9.91443202e-08F, 1.00435060e-07F, 1.01723315e-07F, + 1.03009296e-07F, 1.04293211e-07F, 1.05575259e-07F, 1.06855633e-07F, + 1.08134518e-07F, 1.09412096e-07F, 1.10688542e-07F, 1.11964025e-07F, + 1.13238713e-07F, 1.14512767e-07F, 1.15786343e-07F, 1.17059595e-07F, + 1.18332673e-07F, 1.19605723e-07F, 1.20878890e-07F, 1.22152313e-07F, + 1.23426131e-07F, 1.24700479e-07F, 1.25975490e-07F, 1.27251294e-07F, + 1.28528022e-07F, 1.29805799e-07F, 1.31084751e-07F, 1.32365001e-07F, + 1.33646673e-07F, 1.34929886e-07F, 1.36214760e-07F, 1.37501415e-07F, + 1.38789966e-07F, 1.40080532e-07F, 1.41373228e-07F, 1.42668169e-07F, + 1.43965470e-07F, 1.45265245e-07F, 1.46567606e-07F, 1.47872669e-07F, + 1.49180545e-07F, 1.50491348e-07F, 1.51805191e-07F, 1.53122186e-07F, + 1.54442445e-07F, 1.55766083e-07F, 1.57093212e-07F, 1.58423946e-07F, + 1.59758399e-07F, 1.61096684e-07F, 1.62438917e-07F, 1.63785214e-07F, + 1.65135690e-07F, 1.66490462e-07F, 1.67849647e-07F, 1.69213364e-07F, + 1.70581733e-07F, 1.71954874e-07F, 1.73332908e-07F, 1.74715958e-07F, + 1.76104148e-07F, 1.77497602e-07F, 1.78896448e-07F, 1.80300814e-07F, + 1.81710828e-07F, 1.83126623e-07F, 1.84548331e-07F, 1.85976086e-07F, + 1.87410026e-07F, 1.88850288e-07F, 1.90297012e-07F, 1.91750343e-07F, + 1.93210424e-07F, 1.94677403e-07F, 1.96151428e-07F, 1.97632653e-07F, + 1.99121231e-07F, 2.00617321e-07F, 2.02121082e-07F, 2.03632677e-07F, + 2.05152273e-07F, 2.06680040e-07F, 2.08216149e-07F, 2.09760777e-07F, + 2.11314104e-07F, 2.12876312e-07F, 2.14447590e-07F, 2.16028129e-07F, + 2.17618123e-07F, 2.19217773e-07F, 2.20827283e-07F, 2.22446862e-07F, + 2.24076723e-07F, 2.25717086e-07F, 2.27368174e-07F, 2.29030216e-07F, + 2.30703448e-07F, 2.32388110e-07F, 2.34084450e-07F, 2.35792720e-07F, + 2.37513182e-07F, 2.39246101e-07F, 2.40991752e-07F, 2.42750416e-07F, + 2.44522382e-07F, 2.46307948e-07F, 2.48107418e-07F, 2.49921109e-07F, + 2.51749342e-07F, 2.53592452e-07F, 2.55450781e-07F, 2.57324683e-07F, + 2.59214522e-07F, 2.61120673e-07F, 2.63043524e-07F, 2.64983476e-07F, + 2.66940939e-07F, 2.68916342e-07F, 2.70910123e-07F, 2.72922739e-07F, + 2.74954660e-07F, 2.77006373e-07F, 2.79078382e-07F, 2.81171210e-07F, + 2.83285396e-07F, 2.85421503e-07F, 2.87580110e-07F, 2.89761822e-07F, + 2.91967265e-07F, 2.94197089e-07F, 2.96451969e-07F, 2.98732610e-07F, + 3.01039742e-07F, 3.03374127e-07F, 3.05736557e-07F, 3.08127859e-07F, + 3.10548894e-07F, 3.13000563e-07F, 3.15483804e-07F, 3.17999599e-07F, + 3.20548974e-07F, 3.23133003e-07F, 3.25752811e-07F, 3.28409576e-07F, + 3.31104534e-07F, 3.33838984e-07F, 3.36614287e-07F, 3.39431878e-07F, + 3.42293264e-07F, 3.45200034e-07F, 3.48153864e-07F, 3.51156520e-07F, + 3.54209871e-07F, 3.57315892e-07F, 3.60476673e-07F, 3.63694431e-07F, + 3.66971518e-07F, 3.70310433e-07F, 3.73713834e-07F, 3.77184553e-07F, + 3.80725611e-07F, 3.84340234e-07F, 3.88031877e-07F, 3.91804239e-07F, + 3.95661291e-07F, 3.99607304e-07F, 4.03646879e-07F, 4.07784981e-07F, + 4.12026980e-07F, 4.16378695e-07F, 4.20846449e-07F, 4.25437124e-07F, + 4.30158235e-07F, 4.35018005e-07F, 4.40025460e-07F, 4.45190536e-07F, + 4.50524210e-07F, 4.56038644e-07F, 4.61747369e-07F, 4.67665494e-07F, + 4.73809965e-07F, 4.80199879e-07F, 4.86856855e-07F, 4.93805512e-07F, + 5.01074042e-07F, 5.08694944e-07F, 5.16705952e-07F, 5.25151216e-07F, + 5.34082859e-07F, 5.43563016e-07F, 5.53666578e-07F, 5.64484953e-07F, + 5.76131313e-07F, 5.88748108e-07F, 6.02518140e-07F, 6.17681418e-07F, + 6.34561837e-07F, 6.53611496e-07F, 6.75488730e-07F, 7.01206245e-07F, + 7.32441505e-07F, 7.72282898e-07F, 8.27435688e-07F, 9.17567905e-07F, +}; +static const float fe_float[] = { + 1.00000000e+00F, 9.38143681e-01F, 9.00469930e-01F, 8.71704332e-01F, + 8.47785501e-01F, 8.26993297e-01F, 8.08421652e-01F, 7.91527637e-01F, + 7.75956852e-01F, 7.61463389e-01F, 7.47868622e-01F, 7.35038092e-01F, + 7.22867660e-01F, 7.11274761e-01F, 7.00192655e-01F, 6.89566496e-01F, + 6.79350572e-01F, 6.69506317e-01F, 6.60000841e-01F, 6.50805833e-01F, + 6.41896716e-01F, 6.33251994e-01F, 6.24852739e-01F, 6.16682181e-01F, + 6.08725382e-01F, 6.00968966e-01F, 5.93400902e-01F, 5.86010318e-01F, + 5.78787359e-01F, 5.71723049e-01F, 5.64809193e-01F, 5.58038282e-01F, + 5.51403417e-01F, 5.44898238e-01F, 5.38516872e-01F, 5.32253880e-01F, + 5.26104214e-01F, 5.20063177e-01F, 5.14126394e-01F, 5.08289776e-01F, + 5.02549502e-01F, 4.96901987e-01F, 4.91343870e-01F, 4.85871987e-01F, + 4.80483364e-01F, 4.75175193e-01F, 4.69944825e-01F, 4.64789756e-01F, + 4.59707616e-01F, 4.54696157e-01F, 4.49753251e-01F, 4.44876873e-01F, + 4.40065101e-01F, 4.35316103e-01F, 4.30628137e-01F, 4.25999541e-01F, + 4.21428729e-01F, 4.16914186e-01F, 4.12454466e-01F, 4.08048183e-01F, + 4.03694013e-01F, 3.99390684e-01F, 3.95136982e-01F, 3.90931737e-01F, + 3.86773829e-01F, 3.82662181e-01F, 3.78595759e-01F, 3.74573568e-01F, + 3.70594648e-01F, 3.66658080e-01F, 3.62762973e-01F, 3.58908473e-01F, + 3.55093753e-01F, 3.51318016e-01F, 3.47580495e-01F, 3.43880445e-01F, + 3.40217149e-01F, 3.36589914e-01F, 3.32998069e-01F, 3.29440964e-01F, + 3.25917972e-01F, 3.22428485e-01F, 3.18971913e-01F, 3.15547685e-01F, + 3.12155249e-01F, 3.08794067e-01F, 3.05463619e-01F, 3.02163401e-01F, + 2.98892921e-01F, 2.95651704e-01F, 2.92439288e-01F, 2.89255223e-01F, + 2.86099074e-01F, 2.82970415e-01F, 2.79868833e-01F, 2.76793928e-01F, + 2.73745310e-01F, 2.70722597e-01F, 2.67725420e-01F, 2.64753419e-01F, + 2.61806243e-01F, 2.58883550e-01F, 2.55985007e-01F, 2.53110290e-01F, + 2.50259082e-01F, 2.47431076e-01F, 2.44625969e-01F, 2.41843469e-01F, + 2.39083290e-01F, 2.36345152e-01F, 2.33628783e-01F, 2.30933917e-01F, + 2.28260294e-01F, 2.25607660e-01F, 2.22975768e-01F, 2.20364376e-01F, + 2.17773247e-01F, 2.15202151e-01F, 2.12650862e-01F, 2.10119159e-01F, + 2.07606828e-01F, 2.05113656e-01F, 2.02639439e-01F, 2.00183975e-01F, + 1.97747066e-01F, 1.95328521e-01F, 1.92928150e-01F, 1.90545770e-01F, + 1.88181199e-01F, 1.85834263e-01F, 1.83504787e-01F, 1.81192603e-01F, + 1.78897547e-01F, 1.76619455e-01F, 1.74358169e-01F, 1.72113535e-01F, + 1.69885401e-01F, 1.67673619e-01F, 1.65478042e-01F, 1.63298529e-01F, + 1.61134940e-01F, 1.58987139e-01F, 1.56854992e-01F, 1.54738369e-01F, + 1.52637142e-01F, 1.50551185e-01F, 1.48480376e-01F, 1.46424594e-01F, + 1.44383722e-01F, 1.42357645e-01F, 1.40346251e-01F, 1.38349429e-01F, + 1.36367071e-01F, 1.34399072e-01F, 1.32445328e-01F, 1.30505738e-01F, + 1.28580205e-01F, 1.26668629e-01F, 1.24770919e-01F, 1.22886980e-01F, + 1.21016722e-01F, 1.19160057e-01F, 1.17316899e-01F, 1.15487164e-01F, + 1.13670768e-01F, 1.11867632e-01F, 1.10077676e-01F, 1.08300825e-01F, + 1.06537004e-01F, 1.04786139e-01F, 1.03048160e-01F, 1.01322997e-01F, + 9.96105837e-02F, 9.79108533e-02F, 9.62237426e-02F, 9.45491894e-02F, + 9.28871336e-02F, 9.12375166e-02F, 8.96002819e-02F, 8.79753745e-02F, + 8.63627411e-02F, 8.47623305e-02F, 8.31740930e-02F, 8.15979807e-02F, + 8.00339475e-02F, 7.84819492e-02F, 7.69419432e-02F, 7.54138887e-02F, + 7.38977470e-02F, 7.23934809e-02F, 7.09010552e-02F, 6.94204365e-02F, + 6.79515934e-02F, 6.64944964e-02F, 6.50491178e-02F, 6.36154320e-02F, + 6.21934154e-02F, 6.07830464e-02F, 5.93843056e-02F, 5.79971756e-02F, + 5.66216413e-02F, 5.52576897e-02F, 5.39053102e-02F, 5.25644946e-02F, + 5.12352371e-02F, 4.99175343e-02F, 4.86113856e-02F, 4.73167929e-02F, + 4.60337611e-02F, 4.47622977e-02F, 4.35024136e-02F, 4.22541224e-02F, + 4.10174414e-02F, 3.97923910e-02F, 3.85789955e-02F, 3.73772828e-02F, + 3.61872848e-02F, 3.50090377e-02F, 3.38425822e-02F, 3.26879635e-02F, + 3.15452322e-02F, 3.04144439e-02F, 2.92956602e-02F, 2.81889488e-02F, + 2.70943838e-02F, 2.60120466e-02F, 2.49420264e-02F, 2.38844205e-02F, + 2.28393354e-02F, 2.18068875e-02F, 2.07872041e-02F, 1.97804243e-02F, + 1.87867007e-02F, 1.78062004e-02F, 1.68391068e-02F, 1.58856218e-02F, + 1.49459680e-02F, 1.40203914e-02F, 1.31091649e-02F, 1.22125924e-02F, + 1.13310136e-02F, 1.04648102e-02F, 9.61441364e-03F, 8.78031499e-03F, + 7.96307744e-03F, 7.16335318e-03F, 6.38190594e-03F, 5.61964221e-03F, + 4.87765598e-03F, 4.15729512e-03F, 3.46026478e-03F, 2.78879879e-03F, + 2.14596774e-03F, 1.53629978e-03F, 9.67269282e-04F, 4.54134354e-04F, +}; diff --git a/numpy/random/src/dsfmt/128-bit-jump.poly.txt b/numpy/random/src/dsfmt/128-bit-jump.poly.txt new file mode 100644 index 000000000..fea1318fb --- /dev/null +++ b/numpy/random/src/dsfmt/128-bit-jump.poly.txt @@ -0,0 +1,2 @@ +jump polynomial: +f4dfa6c62049d0776e0bf6f1e953f3aa38abb113df86be024eab3773ad5f2b82ead936022e656dff7e562691c59dd5f7d2566b78d9669002503c4ddb1888a49f32333f515e6c60c4ecd221078ec6f26f0a90f4875067ca1f399a99775037adf905566e2c7e6b42131420f8f04f112c92621c9b1502f2a8aefad6c667904af62f0d55e02d396902d3b89450103c5ce5fe0408d97cbb864861b49e4e42048ff3310b48faac55095a7f422eea4aade752f947f947c6be0a0c665bdea099246ab9eff658ea8ca468bf49d0227748367878de06d7bd86ea6708fcac6e252f5f00f04309b2aac3036b64afb39d990427c6c9f03477cc7e935c43c0e61bc161db8eb15516eee8cb377ecbc1849207990fb6778721b29bfe0d89bfda1b3772fa5b0b1f7ec3daf36052032285898c6f6396f55010c31f8201b7e2e51d94f920bfe57684c5415cc342cb39a0045d9793d13cf8646096daeb8bb9bfc20a90de8f2426da8733267a9b9674f32154e8f84a9932223a2ca3c787d0b66df6675febbdfcba2f9cef09c621c57e11098b3289c77397aaae8b104642ffe0c4b75598efbc53745984d68b4d6656cae299ae2be55217a9a02b009ca7be32f47fbe434bce4914a34d0c9b0085bede9b8a99319c34660d66f0124b5a7714c4bf3cbfec3ee43ed817087168bad80133bebaeeb68cf7929a24d1bb3de831a8340d220906ab04159cf94b21d5ee813bd7c80f10f01b43052af530917513b169254c25d6fcfe6cb420d6ce92f54886ef6eaf9a5ba35e893ff593834d05ddf28899e42d729c7df3d21ef036020789739366f0c11ec52ff92a0bfd8ba69508e27b20fabb8217bd36b90e5aa918159ac87913bc7b46c04e366c23c92807fbe9c6a407e6a4db0b4fc23c3b6c706b5ca058fe8c190f849f18d16d6b48b5ed760eb202fd566291a799420b9654e08b8118bcbfead8e9dd2fdb9b053e9bdfb665285c78718f726d0b3d6c37e116428ec9ac9db2637259e4e8d6402bbada46c6bdb03985e19a82e9b4e57de1b025a3cb1f850beae7e8da9941655825bce0e89d536b6ee9064865b1a85c185e9fc9cb7f435de13d44773c00eed442a286e4ab807e3cab4dc3441d1b7d2af693812ae8b39652bb8c835fc895d13d6da93541afeadeee450475c29f3b2dfa8ef1c1e2547463b2cc2f0ff7a42ac4dd35e25c4fa030d2d2766fbe9f2d04c1304671747bace2f7dd55142bfa60f8cbc968bfc3d7a342152dc684a0fb5a32c0962a62b5220ac0f72add9d8b84d6cc76b97d03245e01fc8da3414a49bb4075d3488f29b56dc42ba69e3b58529448c943ecfd98b3784a39d0b8609a8fb945e757f4569f53bd2cf80f7f638acf5b67fe9c560a3b7b0cf7e0398f31aa8b03cf9c62b24296b6d8596b694469a02686c38daa16a1ef86e012d61a2f7de1693a5c00b3685175caec3c67146477eba54830f1d546cb18a553779aa46adb4f2010e33f3def847c7d89b51a8462b227605f6c920fd558a6daf64bc98682e508ae960c0c571870e603ba1fce0c13d53176f353fd319959e13db93eae1359f06e3dd4767c04f824cf34ec7bf8f60161ba1a615db82852eca9e3869afa711ab9a090660b0dc6cfbea310dda77e02310fbaeacd2636f975838c2dbcdbe9ac2cd85cee28f5e3f0c73abf62f9fa02cd79a7606b7ba855db68a07848b057c3aaf38f1a70086e14616f6f88305a1f9ce6b41378a620d4db3e0e7e1d421590dccaeff86212e232eeb5eb8a8d33a8c9b25ae88f3a7bd5032b4efa68f8af3186a02ffcbf5456f12beccace94c81c360cc4a0dcc642b59f991eec68c59af78139ca60b96d6a18e9535f8995e89bd2cf6a0aef3acffd33d1c0c1b79b66414a91d9f65b2b4ec65844b96f725d2b4b0c309f3eb9d714e9dd939bbdfd85ce8fb43679aeab13f6c29549949503c9466dbd337c4cdde46d6eacd15f21f4d8fdeaa627a47884c88a9c85f0b731d271a8ea7cb9e04a4a149c23c10f56b3a0476dc77a999d6e4f813e4b0f805e2a693e2ae4ae0ecc423c9ba5d17b42e691abf83784a582f2b1fd85d1e0a27ba38a500963568b2450363d2c5e3f7b8ba3e5b56e4e9f745a3a710bf2ae233c303068c532ce78ff031e6ab28b705dd94d7db4500909edb5626b8c9bd5ff4f0b4741388f0b91563ee516934c013e901572cba005ac5c535f4f107903be9af7b2793dfb61b5070facbe71eefe1b5600f975c8c38c3a2350d78beadfecb78e981164ae8bc866e732972d3ceef4aac68e15861f9b881d9b51b4edece150bc124b07645defb4202ef5d0e0962db98cae6ed459561c93c74c20bd64362e4f4fffc389a6cd80514604ff22eecc10c9cbc7981d19a8102b24146354c463107c9dc070e29e70df3578022acf72289ef071ab9f9402a544d0399f1b1e5f206b6d46d445f6d612a490e72918e00c853eda8493bef511149e80c9ab56e8b4b8cba3987249f77d060e61760e5792ac321c987c03c2606e9393a7970212992cdbd16448078d5039d4c2c3199714f53278f4f7b1d2e514cf95bdfc078b8bb0db659cb2c3f5cc02890ea84f05d414c88d2db9e9f8455659b9fa6254405317245fa070d6970cafb4dadb2522b490a5c8e02fe973a8cdbfbfbdbfb01535099ffba3d3896bc4d1189fc570c3e6fdc6469265b8da912772e75dd62ab71be507f700d56cac5e68fd6b57ec166168ab5258a69625c142a5b1b3519f94be1bde5e51d3bd8ea0c12d5af2fe4615b1b7bd4a96628a4fabc65925ff09718f63bbebaad98f89bd9543a27b3ff3b5d8bfa89f941a5eb8cc005ccd4a705190e1c9dc6a9f4264e5ee658520a4438e92de854bffc39f8dc7dfbb5de4f14ba63ea16a37d14a7b4610f95b6cffd55e4679b29cedbdf20e7bd16da822fad910c359ee3a68e48aae6e769b0e291d5d3aa3e2ca9d8d23abe8a1d5349f4991e9300852cc0befb20c2fc0d169306b260763344024f8092cbcc24c6807363e9fc548a30d5faab3a94b2af0782a2942be80c45d8b0587efd587394ef33c33022436e285806ddffdd32fe36345c3c38ed8d680abeb7a028b44ee6f94d060a14c7019bb6af1f1b5f0a562957d19826d8cc216f9b908c989ccd5415e3525dfe9422ffb5b50b7cc3083dc325544751e5683535d7439d3da2b0bb73bea551dd99e04e0e793804f4774eb6b1daf781d9caa5128274e599e847862fe309027813d3e4eda0bbeb7201856a5c5d8370e44dabff0bb229c723ba0a6bcf29c44536147de11b7835991018100105bd4329217f7386903fe8e7363cd7b3e893244e245e0a187467664c05b0be1fd429722b9b9a5e3198147fad72776e8a63aab9054fa9d259af0198d088d71d132e6068676a8e9ebb0f616b51ee34aac39c2c2221c71124017270d75ff4a048363c389e04e9b440ad2032a381ac2cfc54f409caa791e65ee4f5d6cd035008f219b88a803a7382ae447bf65a3df2176b25b3b7b67dabe34decd9a1384dc7a003916ca8fbcb29b3ad6fd8eac5bbbaa3bdfa6c6a3ad9427c4f3ed79fea26e14c8ce5fa3b4f82c5f7b6d2125916753a7b92ce9b46d45 diff --git a/numpy/random/src/dsfmt/96-bit-jump.poly.txt b/numpy/random/src/dsfmt/96-bit-jump.poly.txt new file mode 100644 index 000000000..15c68d155 --- /dev/null +++ b/numpy/random/src/dsfmt/96-bit-jump.poly.txt @@ -0,0 +1,2 @@ +jump polynomial: +288da521f7244e5f62bf26692bdd1fcdfd38a850addb02d98bd358367cb78c71348f3e163522e0e30e4feb90aa210dd793c94d151fa89aa319911aa42b511ea568a503d595c6d9bcd37317a37ef5679700a5b67f29df72451770fc1eb8c97427cdd9825c23f32dcd5c4fb117a4f5982a3bee8f16595d021165cd9688db342e360e222c9855c8306fd7b5fc82e62e3b1765e7f3319da9da66c325b030bd6175876efc70636306cd2de31a299ca20e9eb1d5063bcbff0ba282aff4737a5b1585cd940ae9cd45fda222308341e0d5588e81b42c4e0574deeb2d80b84c00cb3f8a7ae6278462e1994b83a25b33aa0dc74d5d3d057dabfd6a8a82d7dfb6bb66a223bc46dca2b5fb1885e6ab80fddcd6578b32c21c4a9d761cb9798800c921d56ee356c491454e15956e68ef566c1706fcdfb6a6828ec1fb93db455525828e8741a371c14959658b99bbd358a162230ee451a8432df279e4ba0d3a493954359a5390b16475540578270b168054fefb1e20948d4d20c88005ed09e671b6a94b8ea929f72e7b2f85af4098a92d742b64964ea6b7773b2c20b22a0ff35bd9367c3160b549411230e15a987f361e04daac49d2fe4c7c9371d84bf270d8f33a62680b2ee014bf5be691aa0d82e66e885eaa832a241aff8a09c435ac2b0698bc3865c5125d498a4ffadd31d5f2c6aee6496fdc6c13055b53e7f65a683ef539b6e8ea6e21429a11ff74ccef09ee83eac1b5ddaf1b77fed786fd20e8cbb3e8877b7f80a24fef31a9f8d8108099c50abc886f7ab20c4396bf51af1b806003185eaf2061f55036e26f498b92aabadfb6b5bed2d116b32ae387a692058e6160a9508dc44c971454d9a357ba115278861be0aeaa0926d773c5d4563e37dffcfed8bbf44b79e384650b7eff30aae73154a2ef130cee1eaf32d944e5472ae217789c03276deb8290c73dd5cde0b6dce9b28cbb73776e3e52260768f37a38db3d1c26c3c065b641c7a825edc155d947118d8b6ff8c8818440088724261ca83fa866aa4159fbffac8c28c8a7ca5f1e2fde74b4908c8215cbde20190bdf0de1d5a05a2c116a66eeadcafd643098e74ec3e82b3658c0c4fd7c7797d567b8db3d6b67ca262d713dbf45cc80b89f279be0991f276a4501d2ea6222026caa7e6fbcf4d02fdf65d8f439f88cfb1121d1b0f4dd670d146b93e099a32869448d837e347229745e5c30f1521b0c477b2062c9c8f631dcd29664eec7f28bdcac2a1ca2deabbbc89b21824ba92a61eeb4c5dd62b20c346134d842bcfc189f0e2244bfb8596c38c88d5bd4447fcd30890a4acf71801a6bcf5d806073b9ca730db53e160a79516333748708dd5e5be37d75e90e64d14ddf5ccc9390ae67cbba39916ce9b3b6b1d19378e4bd36ef0c9e62570394051cc273122e0790e381b20e083beca6e88bc2fa8bde22c89689b4b710c775cd9065158b48bf855fc3a3e80693908046ea1da9c558f024ea5ea69d65f40317fc3c8bab7606bf7edf17fcaeb706756c4de38319a51fc24c80a2baccef40f4354f5147fb91c9b1b91011d146da7eeb426d25de17d7e39ee53735ef28b64d5e6e5444676f679a8e4e91314377c7606531b90dc1cd8cf2d929ed9137fdc0d6b6db62e764ef200a439d742b51425b818231ed799a53768119466cc42296ce36f52507411709cd9d622c1f899291db27104589a5e4509d9818cef983d6a13ce1247184c4551c38bd319aa068cd9c0b96f9e08b4a7bd7852c129d4126676cbcb30ae029b0e2cec3c58c110fecae7967fca0752874e2fcc077084dd4d87b4dee632b621f35cb9f22639ab4de86089c56cabb1aa8a4fedbc5d8673cca12b4479dca5dc6f3048bb654fd59c464b033960177a4d6c2ee38b124d193cd5da3cbc5aa5ebdf7197f5d3f3008f01effb599b6d238044f2ee204bf372685256813278ca7a7d145445f5d5eb5490d99ee89d587fe446e1600d7daf9553b7d709bd796c59757e2f7c26cb71990a51ffc49120f358ef4927729b7e8c2cf9ad35a8017667625a39e2d12c8b0dd5de3d3797d9167ac9302a122e0f066a2552a7278a632b043089a1f5a127ce0bc8e688c545f20bca467fd58d9f9b8234f2be7a28152ab4d39ba8d6c4471d574571aa1a4a3aca16f46ac74e664899cf24502334aec637a9c392ba1618340080bfaed493abaa2f684ffb2bc1da86d804305772e30893f67235d5ce8180ef60490e46f131465d249a39df1aaed30621169c0f7db0b6a6adcab975ec00ca68b2fc06f443cfb8933b326f106670e0e528f22ff96d06b2a6d917258d29a87cf637f37c3661b42462c11c73c1cd0a6774e4e10157b02c0cfd9148ad8a23775653db3dec41fd6d1d5eb54a3e5e144a489a18412f21eb96a72b6751482882d636a1cd0057ea48d1a985472b0c301c4a5b24b0152bdc50a47126a01a1d0665270cdbf2ed852e0f61515bad6e58973329d269bf057ffa52331dde4700b926f635cdf49234f4aaabbabca5386526568fc3a80b1d8a9e1566e21bf889546379289263d978e95de390c4bbdb5a813535b7186f661f5c283400adb3bcf9365d45abb6088b729a98789265f1e8e684e2b2c15b4ce53230abc5e5bf6895827c95842e0849fc7fe56b7c65f075baded0f2e172c8e1088219615b8697ff4a3c6f5f708e498e6c7312680f214a877061511d3b85087545fc26708d039e977a0157b95ceba40497cc2dd1b1b1394eb98f651d701dfbc3583c159da8bae76a25db213211d6191d83c5d7a3d4b2320b8a305b5d90e04d6e3de161fff5dae7e3d4f6c9e1cf55cb5ac1677d9cfd560db2209be2b9566a267aad9cf4b9e03c221ba5b0f02cabb50cef19180ba329691d114da670d7b85e36c0b6b7d81613bd31350dfe225050861e90043ac2334478f52584a1a8809f76d40af36da02549c54da164487f0b7f823cff797db1c90f2f1c431eca97fc8bcdfb44c30cd1643c893d5e33aa56cbc0a0c38f5c8f6bb37483d13b85b659ac51ce05311bba19c97772e81c2315a009e80f591c82445d493dc3b5fb12c52e8c50c6260200b0d77092bf19aabce491b2c511fca2a4ebd99b446e55f453314297723894f0b223868ef80f5964468afa3a5e6aa41f2128e6de893bba2cbde9bea91ba97bda18a01905ce8d2e85e6011cc0550f5ae9121361fdec1ec97a6e6892a68732a69147476d54babaa564b883baad7100eb1092a1aa28a29f67e6b53deab511e53eada87cfe1bb6e3c6a215fd8d729840a5b5ac452cfd8acb9be7818d2c806c3e0cedd0847ddf9a5906bf1a0fa4da44ccea829a1a5350d5db0241a221b97e5cd5ba15e58b402529317c854fbcda86a562d4ee44a34193513647af0a3bc9f90ababc1dbbfd9aba8d3dcc39463473ca6bc0e1dc736ba712eef42dee80e31e7d8abe23f98e91ab875d0553bc24be9cb1d9484812c0b038cb177ad52064328e17f8ca7c8737902d964017e3aaae66161270dac21de42a6f60d10d89c1556916a249a130752bb7c7783b93a59d9f5456745ecc512f497b5a31be2678b9587628cb45dae2f5f6bde7ea4500c1ba961e2 diff --git a/numpy/random/src/dsfmt/LICENSE.md b/numpy/random/src/dsfmt/LICENSE.md new file mode 100644 index 000000000..d59568f6b --- /dev/null +++ b/numpy/random/src/dsfmt/LICENSE.md @@ -0,0 +1,34 @@ +# DSFMT + +Copyright (c) 2007, 2008, 2009 Mutsuo Saito, Makoto Matsumoto +and Hiroshima University. +Copyright (c) 2011, 2002 Mutsuo Saito, Makoto Matsumoto, Hiroshima +University and The University of Tokyo. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. +* Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. +* Neither the name of the Hiroshima University nor the names of + its contributors may be used to endorse or promote products + derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/numpy/random/src/dsfmt/calc-jump.cpp b/numpy/random/src/dsfmt/calc-jump.cpp new file mode 100644 index 000000000..495b2797c --- /dev/null +++ b/numpy/random/src/dsfmt/calc-jump.cpp @@ -0,0 +1,81 @@ +/** + * @file calc-jump.cpp + * + * @brief calc jump function. + * + * @author Mutsuo Saito (Hiroshima University) + * @author Makoto Matsumoto (The University of Tokyo) + * + * Copyright (C) 2012 Mutsuo Saito, Makoto Matsumoto, + * Hiroshima University and The University of Tokyo. + * All rights reserved. + * + * The 3-clause BSD License is applied to this software, see + * LICENSE.txt + * + * Compile: + * g++ calc-jump.cpp -o calc-jump -lntl + * + * Compute polynomial for 2^128 steps: + * ./calc-jump 340282366920938463463374607431768211456 poly.19937.txt + * + */ +#include <iostream> +#include <fstream> +#include <iomanip> +#include <sstream> +#include <string> +#include <inttypes.h> +#include <stdint.h> +#include <time.h> +#include <NTL/GF2X.h> +#include <NTL/vec_GF2.h> +#include <NTL/ZZ.h> +#include "dSFMT-calc-jump.hpp" + +using namespace NTL; +using namespace std; +using namespace dsfmt; + +static void read_file(GF2X& lcmpoly, long line_no, const string& file); + +int main(int argc, char * argv[]) { + if (argc <= 2) { + cout << argv[0] << " jump-step poly-file" << endl; + cout << " jump-step: a number between zero and 2^{DSFMT_MEXP}-1.\n" + << " large decimal number is allowed." << endl; + cout << " poly-file: one of poly.{MEXP}.txt " + << "file" << endl; + return -1; + } + string step_string = argv[1]; + string filename = argv[2]; + long no = 0; + GF2X lcmpoly; + read_file(lcmpoly, no, filename); + ZZ step; + stringstream ss(step_string); + ss >> step; + string jump_str; + calc_jump(jump_str, step, lcmpoly); + cout << "jump polynomial:" << endl; + cout << jump_str << endl; + return 0; +} + + +static void read_file(GF2X& lcmpoly, long line_no, const string& file) +{ + ifstream ifs(file.c_str()); + string line; + for (int i = 0; i < line_no; i++) { + ifs >> line; + ifs >> line; + } + if (ifs) { + ifs >> line; + line = ""; + ifs >> line; + } + stringtopoly(lcmpoly, line); +} diff --git a/numpy/random/src/dsfmt/dSFMT-benchmark.c b/numpy/random/src/dsfmt/dSFMT-benchmark.c new file mode 100644 index 000000000..af29d0e1f --- /dev/null +++ b/numpy/random/src/dsfmt/dSFMT-benchmark.c @@ -0,0 +1,43 @@ +/* + * + * cl dsfmt-benchmark.c dSFMT.c /Ox -DHAVE_SSE2 + * + * gcc dSFMT-benchmark.c dSFMT.c -O3 -DHAVE_SSE2 -DDSFMT_MEXP=19937 -o + * dSFMT-benchmark + */ +#include <inttypes.h> +#include <time.h> + +#include "dSFMT.h" + + +#define N 1000000000 + +int main() { + int i, j; + uint32_t seed = 0xDEADBEAF; + uint64_t count = 0, sum = 0; + dsfmt_t state; + double buffer[DSFMT_N64]; + + uint64_t out; + uint64_t *tmp; + dsfmt_init_gen_rand(&state, seed); + clock_t begin = clock(); + for (i = 0; i < N / (DSFMT_N64 / 2); i++) { + dsfmt_fill_array_close_open(&state, &buffer[0], DSFMT_N64); + for (j = 0; j < DSFMT_N64; j += 2) { + tmp = (uint64_t *)&buffer[j]; + out = (*tmp >> 16) << 32; + tmp = (uint64_t *)&buffer[j + 1]; + out |= (*tmp >> 16) & 0xffffffff; + sum += out; + count++; + } + } + clock_t end = clock(); + double time_spent = (double)(end - begin) / CLOCKS_PER_SEC; + printf("0x%" PRIx64 "\ncount: %" PRIu64 "\n", sum, count); + printf("%" PRIu64 " randoms per second\n", + (uint64_t)(N / time_spent) / 1000000 * 1000000); +} diff --git a/numpy/random/src/dsfmt/dSFMT-calc-jump.hpp b/numpy/random/src/dsfmt/dSFMT-calc-jump.hpp new file mode 100644 index 000000000..b960826be --- /dev/null +++ b/numpy/random/src/dsfmt/dSFMT-calc-jump.hpp @@ -0,0 +1,106 @@ +#pragma once +#ifndef DSFMT_CALC_JUMP_HPP +#define DSFMT_CALC_JUMP_HPP +/** + * @file dSFMT-calc-jump.hpp + * + * @brief functions for calculating jump polynomial. + * + * @author Mutsuo Saito (Hiroshima University) + * @author Makoto Matsumoto (The University of Tokyo) + * + * Copyright (C) 2012 Mutsuo Saito, Makoto Matsumoto, + * Hiroshima University and The University of Tokyo. + * All rights reserved. + * + * The 3-clause BSD License is applied to this software, see + * LICENSE.txt + */ +#include <iostream> +#include <iomanip> +#include <sstream> +#include <NTL/GF2X.h> + +namespace dsfmt { +/** + * converts polynomial to string for convenient use in C language. + * @param x output string + * @param polynomial input polynomial + */ + static inline void polytostring(std::string& x, NTL::GF2X& polynomial) + { + using namespace NTL; + using namespace std; + + long degree = deg(polynomial); + int buff; + stringstream ss; + for (int i = 0; i <= degree; i+=4) { + buff = 0; + for (int j = 0; j < 4; j++) { + if (IsOne(coeff(polynomial, i + j))) { + buff |= 1 << j; + } else { + buff &= (0x0f ^ (1 << j)); + } + } + ss << hex << buff; + } + ss << flush; + x = ss.str(); + } + +/** + * converts string to polynomial + * @param str string + * @param poly output polynomial + */ + static inline void stringtopoly(NTL::GF2X& poly, std::string& str) + { + using namespace NTL; + using namespace std; + + stringstream ss(str); + char c; + long p = 0; + clear(poly); + while(ss) { + ss >> c; + if (!ss) { + break; + } + if (c >= 'a') { + c = c - 'a' + 10; + } else { + c = c - '0'; + } + for (int j = 0; j < 4; j++) { + if (c & (1 << j)) { + SetCoeff(poly, p, 1); + } else { + SetCoeff(poly, p, 0); + } + p++; + } + } + } + +/** + * calculate the jump polynomial. + * SFMT generates 4 32-bit integers from one internal state. + * @param jump_str output string which represents jump polynomial. + * @param step jump step of internal state + * @param characteristic polynomial + */ + static inline void calc_jump(std::string& jump_str, + NTL::ZZ& step, + NTL::GF2X& characteristic) + { + using namespace NTL; + using namespace std; + GF2X jump; + PowerXMod(jump, step, characteristic); + polytostring(jump_str, jump); + } +} +#endif diff --git a/numpy/random/src/dsfmt/dSFMT-common.h b/numpy/random/src/dsfmt/dSFMT-common.h new file mode 100644 index 000000000..30c26c08b --- /dev/null +++ b/numpy/random/src/dsfmt/dSFMT-common.h @@ -0,0 +1,115 @@ +#pragma once +/** + * @file dSFMT-common.h + * + * @brief SIMD oriented Fast Mersenne Twister(SFMT) pseudorandom + * number generator with jump function. This file includes common functions + * used in random number generation and jump. + * + * @author Mutsuo Saito (Hiroshima University) + * @author Makoto Matsumoto (The University of Tokyo) + * + * Copyright (C) 2006, 2007 Mutsuo Saito, Makoto Matsumoto and Hiroshima + * University. + * Copyright (C) 2012 Mutsuo Saito, Makoto Matsumoto, Hiroshima + * University and The University of Tokyo. + * All rights reserved. + * + * The 3-clause BSD License is applied to this software, see + * LICENSE.txt + */ +#ifndef DSFMT_COMMON_H +#define DSFMT_COMMON_H + +#include "dSFMT.h" + +#if defined(HAVE_SSE2) +# include <emmintrin.h> +union X128I_T { + uint64_t u[2]; + __m128i i128; +}; +union X128D_T { + double d[2]; + __m128d d128; +}; +/** mask data for sse2 */ +static const union X128I_T sse2_param_mask = {{DSFMT_MSK1, DSFMT_MSK2}}; +#endif + +#if defined(HAVE_ALTIVEC) +inline static void do_recursion(w128_t *r, w128_t *a, w128_t * b, + w128_t *lung) { + const vector unsigned char sl1 = ALTI_SL1; + const vector unsigned char sl1_perm = ALTI_SL1_PERM; + const vector unsigned int sl1_msk = ALTI_SL1_MSK; + const vector unsigned char sr1 = ALTI_SR; + const vector unsigned char sr1_perm = ALTI_SR_PERM; + const vector unsigned int sr1_msk = ALTI_SR_MSK; + const vector unsigned char perm = ALTI_PERM; + const vector unsigned int msk1 = ALTI_MSK; + vector unsigned int w, x, y, z; + + z = a->s; + w = lung->s; + x = vec_perm(w, (vector unsigned int)perm, perm); + y = vec_perm(z, (vector unsigned int)sl1_perm, sl1_perm); + y = vec_sll(y, sl1); + y = vec_and(y, sl1_msk); + w = vec_xor(x, b->s); + w = vec_xor(w, y); + x = vec_perm(w, (vector unsigned int)sr1_perm, sr1_perm); + x = vec_srl(x, sr1); + x = vec_and(x, sr1_msk); + y = vec_and(w, msk1); + z = vec_xor(z, y); + r->s = vec_xor(z, x); + lung->s = w; +} +#elif defined(HAVE_SSE2) +/** + * This function represents the recursion formula. + * @param r output 128-bit + * @param a a 128-bit part of the internal state array + * @param b a 128-bit part of the internal state array + * @param d a 128-bit part of the internal state array (I/O) + */ +inline static void do_recursion(w128_t *r, w128_t *a, w128_t *b, w128_t *u) { + __m128i v, w, x, y, z; + + x = a->si; + z = _mm_slli_epi64(x, DSFMT_SL1); + y = _mm_shuffle_epi32(u->si, SSE2_SHUFF); + z = _mm_xor_si128(z, b->si); + y = _mm_xor_si128(y, z); + + v = _mm_srli_epi64(y, DSFMT_SR); + w = _mm_and_si128(y, sse2_param_mask.i128); + v = _mm_xor_si128(v, x); + v = _mm_xor_si128(v, w); + r->si = v; + u->si = y; +} +#else +/** + * This function represents the recursion formula. + * @param r output 128-bit + * @param a a 128-bit part of the internal state array + * @param b a 128-bit part of the internal state array + * @param lung a 128-bit part of the internal state array (I/O) + */ +inline static void do_recursion(w128_t *r, w128_t *a, w128_t * b, + w128_t *lung) { + uint64_t t0, t1, L0, L1; + + t0 = a->u[0]; + t1 = a->u[1]; + L0 = lung->u[0]; + L1 = lung->u[1]; + lung->u[0] = (t0 << DSFMT_SL1) ^ (L1 >> 32) ^ (L1 << 32) ^ b->u[0]; + lung->u[1] = (t1 << DSFMT_SL1) ^ (L0 >> 32) ^ (L0 << 32) ^ b->u[1]; + r->u[0] = (lung->u[0] >> DSFMT_SR) ^ (lung->u[0] & DSFMT_MSK1) ^ t0; + r->u[1] = (lung->u[1] >> DSFMT_SR) ^ (lung->u[1] & DSFMT_MSK2) ^ t1; +} +#endif +#endif diff --git a/numpy/random/src/dsfmt/dSFMT-jump.c b/numpy/random/src/dsfmt/dSFMT-jump.c new file mode 100644 index 000000000..1832bb885 --- /dev/null +++ b/numpy/random/src/dsfmt/dSFMT-jump.c @@ -0,0 +1,184 @@ +/** + * @file dSFMT-jump.c + * + * @brief do jump using jump polynomial. + * + * @author Mutsuo Saito (Hiroshima University) + * @author Makoto Matsumoto (The University of Tokyo) + * + * Copyright (C) 2012 Mutsuo Saito, Makoto Matsumoto, + * Hiroshima University and The University of Tokyo. + * All rights reserved. + * + * The 3-clause BSD License is applied to this software, see + * LICENSE.txt + */ + +#include <assert.h> +#include <stdlib.h> +#include <ctype.h> +#include <string.h> +#include "dSFMT-params.h" +#include "dSFMT.h" +#include "dSFMT-jump.h" +#include "dSFMT-common.h" + +#if defined(__cplusplus) +extern "C" { +#endif + + struct FIX_T { + int mexp; + uint64_t fix[4]; + }; + + struct FIX_T fix_table[] = { + {521, {UINT64_C(0x3fff56977f035125), + UINT64_C(0x3ff553857b015035), + UINT64_C(0x4034434434434434), + UINT64_C(0x0140151151351371)}}, + {1279, {UINT64_C(0x3ff87befce70e89f), + UINT64_C(0x3ff5f6afa3c60868), + UINT64_C(0xa4ca4caccaccacdb), + UINT64_C(0x40444444444c44c4)}}, + {4253, {UINT64_C(0x3ff85a66da51a81a), + UINT64_C(0x3ff4f4aeab9688eb), + UINT64_C(0x20524524534d34d3), + UINT64_C(0xc9cc9cc9cc9ccdcf)}}, + {216091, {UINT64_C(0x3ff096d54a871071), + UINT64_C(0x3ffafa9bfbd5d55d), + UINT64_C(0x0470470470573573), + UINT64_C(0x0250250251259259)}}, + {0} + }; + + inline static void next_state(dsfmt_t * dsfmt); + +#if defined(HAVE_SSE2) +/** + * add internal state of src to dest as F2-vector. + * @param dest destination state + * @param src source state + */ + inline static void add(dsfmt_t *dest, dsfmt_t *src) { + int dp = dest->idx / 2; + int sp = src->idx / 2; + int diff = (sp - dp + DSFMT_N) % DSFMT_N; + int p; + int i; + for (i = 0; i < DSFMT_N - diff; i++) { + p = i + diff; + dest->status[i].si + = _mm_xor_si128(dest->status[i].si, src->status[p].si); + } + for (i = DSFMT_N - diff; i < DSFMT_N; i++) { + p = i + diff - DSFMT_N; + dest->status[i].si + = _mm_xor_si128(dest->status[i].si, src->status[p].si); + } + dest->status[DSFMT_N].si + = _mm_xor_si128(dest->status[DSFMT_N].si, + src->status[DSFMT_N].si); + } +#else + inline static void add(dsfmt_t *dest, dsfmt_t *src) { + int dp = dest->idx / 2; + int sp = src->idx / 2; + int diff = (sp - dp + DSFMT_N) % DSFMT_N; + int p; + int i; + for (i = 0; i < DSFMT_N - diff; i++) { + p = i + diff; + dest->status[i].u[0] ^= src->status[p].u[0]; + dest->status[i].u[1] ^= src->status[p].u[1]; + } + for (; i < DSFMT_N; i++) { + p = i + diff - DSFMT_N; + dest->status[i].u[0] ^= src->status[p].u[0]; + dest->status[i].u[1] ^= src->status[p].u[1]; + } + dest->status[DSFMT_N].u[0] ^= src->status[DSFMT_N].u[0]; + dest->status[DSFMT_N].u[1] ^= src->status[DSFMT_N].u[1]; + } +#endif + +/** + * calculate next state + * @param dsfmt dSFMT internal state + */ + inline static void next_state(dsfmt_t * dsfmt) { + int idx = (dsfmt->idx / 2) % DSFMT_N; + w128_t * lung; + w128_t * pstate = &dsfmt->status[0]; + + lung = &pstate[DSFMT_N]; + do_recursion(&pstate[idx], + &pstate[idx], + &pstate[(idx + DSFMT_POS1) % DSFMT_N], + lung); + dsfmt->idx = (dsfmt->idx + 2) % DSFMT_N64; + } + + inline static void add_fix(dsfmt_t * dsfmt) { + int i; + int index = -1; + for (i = 0; fix_table[i].mexp != 0; i++) { + if (fix_table[i].mexp == DSFMT_MEXP) { + index = i; + } + if (fix_table[i].mexp > DSFMT_MEXP) { + break; + } + } + if (index < 0) { + return; + } + for (i = 0; i < DSFMT_N; i++) { + dsfmt->status[i].u[0] ^= fix_table[index].fix[0]; + dsfmt->status[i].u[1] ^= fix_table[index].fix[1]; + } + dsfmt->status[DSFMT_N].u[0] ^= fix_table[index].fix[2]; + dsfmt->status[DSFMT_N].u[1] ^= fix_table[index].fix[3]; + } + +/** + * jump ahead using jump_string + * @param dsfmt dSFMT internal state input and output. + * @param jump_string string which represents jump polynomial. + */ + void dSFMT_jump(dsfmt_t * dsfmt, const char * jump_string) { + dsfmt_t work; + int index = dsfmt->idx; + int bits; + int i; + int j; + memset(&work, 0, sizeof(dsfmt_t)); + add_fix(dsfmt); + dsfmt->idx = DSFMT_N64; + + for (i = 0; jump_string[i] != '\0'; i++) { + bits = jump_string[i]; + assert(isxdigit(bits)); + bits = tolower(bits); + if (bits >= 'a' && bits <= 'f') { + bits = bits - 'a' + 10; + } else { + bits = bits - '0'; + } + bits = bits & 0x0f; + for (j = 0; j < 4; j++) { + if ((bits & 1) != 0) { + add(&work, dsfmt); + } + next_state(dsfmt); + bits = bits >> 1; + } + } + *dsfmt = work; + add_fix(dsfmt); + dsfmt->idx = index; + } + +#if defined(__cplusplus) +} +#endif diff --git a/numpy/random/src/dsfmt/dSFMT-jump.h b/numpy/random/src/dsfmt/dSFMT-jump.h new file mode 100644 index 000000000..689f9499a --- /dev/null +++ b/numpy/random/src/dsfmt/dSFMT-jump.h @@ -0,0 +1,29 @@ +#pragma once +#ifndef DSFMT_JUMP_H +#define DSFMT_JUMP_H +/** + * @file SFMT-jump.h + * + * @brief jump header file. + * + * @author Mutsuo Saito (Hiroshima University) + * @author Makoto Matsumoto (The University of Tokyo) + * + * Copyright (C) 2012 Mutsuo Saito, Makoto Matsumoto, + * Hiroshima University and The University of Tokyo. + * All rights reserved. + * + * The 3-clause BSD License is applied to this software, see + * LICENSE.txt + */ +#if defined(__cplusplus) +extern "C" { +#endif + +#include "dSFMT.h" +void dSFMT_jump(dsfmt_t *dsfmt, const char *jump_str); + +#if defined(__cplusplus) +} +#endif +#endif diff --git a/numpy/random/src/dsfmt/dSFMT-params.h b/numpy/random/src/dsfmt/dSFMT-params.h new file mode 100644 index 000000000..aa0247800 --- /dev/null +++ b/numpy/random/src/dsfmt/dSFMT-params.h @@ -0,0 +1,87 @@ +#ifndef DSFMT_PARAMS_H +#define DSFMT_PARAMS_H + +#include "dSFMT.h" + +/*---------------------- + the parameters of DSFMT + following definitions are in dSFMT-paramsXXXX.h file. + ----------------------*/ +/** the pick up position of the array. +#define DSFMT_POS1 122 +*/ + +/** the parameter of shift left as four 32-bit registers. +#define DSFMT_SL1 18 + */ + +/** the parameter of shift right as four 32-bit registers. +#define DSFMT_SR1 12 +*/ + +/** A bitmask, used in the recursion. These parameters are introduced + * to break symmetry of SIMD. +#define DSFMT_MSK1 (uint64_t)0xdfffffefULL +#define DSFMT_MSK2 (uint64_t)0xddfecb7fULL +*/ + +/** These definitions are part of a 128-bit period certification vector. +#define DSFMT_PCV1 UINT64_C(0x00000001) +#define DSFMT_PCV2 UINT64_C(0x00000000) +*/ + +#define DSFMT_LOW_MASK UINT64_C(0x000FFFFFFFFFFFFF) +#define DSFMT_HIGH_CONST UINT64_C(0x3FF0000000000000) +#define DSFMT_SR 12 + +/* for sse2 */ +#if defined(HAVE_SSE2) + #define SSE2_SHUFF 0x1b +#elif defined(HAVE_ALTIVEC) + #if defined(__APPLE__) /* For OSX */ + #define ALTI_SR (vector unsigned char)(4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4) + #define ALTI_SR_PERM \ + (vector unsigned char)(15,0,1,2,3,4,5,6,15,8,9,10,11,12,13,14) + #define ALTI_SR_MSK \ + (vector unsigned int)(0x000fffffU,0xffffffffU,0x000fffffU,0xffffffffU) + #define ALTI_PERM \ + (vector unsigned char)(12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3) + #else + #define ALTI_SR {4,4,4,4,4,4,4,4,4,4,4,4,4,4,4,4} + #define ALTI_SR_PERM {15,0,1,2,3,4,5,6,15,8,9,10,11,12,13,14} + #define ALTI_SR_MSK {0x000fffffU,0xffffffffU,0x000fffffU,0xffffffffU} + #define ALTI_PERM {12,13,14,15,8,9,10,11,4,5,6,7,0,1,2,3} + #endif +#endif + +#if DSFMT_MEXP == 521 + #include "dSFMT-params521.h" +#elif DSFMT_MEXP == 1279 + #include "dSFMT-params1279.h" +#elif DSFMT_MEXP == 2203 + #include "dSFMT-params2203.h" +#elif DSFMT_MEXP == 4253 + #include "dSFMT-params4253.h" +#elif DSFMT_MEXP == 11213 + #include "dSFMT-params11213.h" +#elif DSFMT_MEXP == 19937 + #include "dSFMT-params19937.h" +#elif DSFMT_MEXP == 44497 + #include "dSFMT-params44497.h" +#elif DSFMT_MEXP == 86243 + #include "dSFMT-params86243.h" +#elif DSFMT_MEXP == 132049 + #include "dSFMT-params132049.h" +#elif DSFMT_MEXP == 216091 + #include "dSFMT-params216091.h" +#else +#ifdef __GNUC__ + #error "DSFMT_MEXP is not valid." + #undef DSFMT_MEXP +#else + #undef DSFMT_MEXP +#endif + +#endif + +#endif /* DSFMT_PARAMS_H */ diff --git a/numpy/random/src/dsfmt/dSFMT-params19937.h b/numpy/random/src/dsfmt/dSFMT-params19937.h new file mode 100644 index 000000000..a600b0dbc --- /dev/null +++ b/numpy/random/src/dsfmt/dSFMT-params19937.h @@ -0,0 +1,40 @@ +#ifndef DSFMT_PARAMS19937_H +#define DSFMT_PARAMS19937_H + +/* #define DSFMT_N 191 */ +/* #define DSFMT_MAXDEGREE 19992 */ +#define DSFMT_POS1 117 +#define DSFMT_SL1 19 +#define DSFMT_MSK1 UINT64_C(0x000ffafffffffb3f) +#define DSFMT_MSK2 UINT64_C(0x000ffdfffc90fffd) +#define DSFMT_MSK32_1 0x000ffaffU +#define DSFMT_MSK32_2 0xfffffb3fU +#define DSFMT_MSK32_3 0x000ffdffU +#define DSFMT_MSK32_4 0xfc90fffdU +#define DSFMT_FIX1 UINT64_C(0x90014964b32f4329) +#define DSFMT_FIX2 UINT64_C(0x3b8d12ac548a7c7a) +#define DSFMT_PCV1 UINT64_C(0x3d84e1ac0dc82880) +#define DSFMT_PCV2 UINT64_C(0x0000000000000001) +#define DSFMT_IDSTR "dSFMT2-19937:117-19:ffafffffffb3f-ffdfffc90fffd" + + +/* PARAMETERS FOR ALTIVEC */ +#if defined(__APPLE__) /* For OSX */ + #define ALTI_SL1 (vector unsigned char)(3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3) + #define ALTI_SL1_PERM \ + (vector unsigned char)(2,3,4,5,6,7,30,30,10,11,12,13,14,15,0,1) + #define ALTI_SL1_MSK \ + (vector unsigned int)(0xffffffffU,0xfff80000U,0xffffffffU,0xfff80000U) + #define ALTI_MSK (vector unsigned int)(DSFMT_MSK32_1, \ + DSFMT_MSK32_2, DSFMT_MSK32_3, DSFMT_MSK32_4) +#else /* For OTHER OSs(Linux?) */ + #define ALTI_SL1 {3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3} + #define ALTI_SL1_PERM \ + {2,3,4,5,6,7,30,30,10,11,12,13,14,15,0,1} + #define ALTI_SL1_MSK \ + {0xffffffffU,0xfff80000U,0xffffffffU,0xfff80000U} + #define ALTI_MSK \ + {DSFMT_MSK32_1, DSFMT_MSK32_2, DSFMT_MSK32_3, DSFMT_MSK32_4} +#endif + +#endif /* DSFMT_PARAMS19937_H */ diff --git a/numpy/random/src/dsfmt/dSFMT-poly.h b/numpy/random/src/dsfmt/dSFMT-poly.h new file mode 100644 index 000000000..f8e15c3eb --- /dev/null +++ b/numpy/random/src/dsfmt/dSFMT-poly.h @@ -0,0 +1,53 @@ +static const char * poly_128 = +"f4dfa6c62049d0776e0bf6f1e953f3aa38abb113df86be024eab3773ad5f2b82ead936022e656dff7e562691c59dd5f7d2" +"566b78d9669002503c4ddb1888a49f32333f515e6c60c4ecd221078ec6f26f0a90f4875067ca1f399a99775037adf90556" +"6e2c7e6b42131420f8f04f112c92621c9b1502f2a8aefad6c667904af62f0d55e02d396902d3b89450103c5ce5fe0408d9" +"7cbb864861b49e4e42048ff3310b48faac55095a7f422eea4aade752f947f947c6be0a0c665bdea099246ab9eff658ea8c" +"a468bf49d0227748367878de06d7bd86ea6708fcac6e252f5f00f04309b2aac3036b64afb39d990427c6c9f03477cc7e93" +"5c43c0e61bc161db8eb15516eee8cb377ecbc1849207990fb6778721b29bfe0d89bfda1b3772fa5b0b1f7ec3daf3605203" +"2285898c6f6396f55010c31f8201b7e2e51d94f920bfe57684c5415cc342cb39a0045d9793d13cf8646096daeb8bb9bfc2" +"0a90de8f2426da8733267a9b9674f32154e8f84a9932223a2ca3c787d0b66df6675febbdfcba2f9cef09c621c57e11098b" +"3289c77397aaae8b104642ffe0c4b75598efbc53745984d68b4d6656cae299ae2be55217a9a02b009ca7be32f47fbe434b" +"ce4914a34d0c9b0085bede9b8a99319c34660d66f0124b5a7714c4bf3cbfec3ee43ed817087168bad80133bebaeeb68cf7" +"929a24d1bb3de831a8340d220906ab04159cf94b21d5ee813bd7c80f10f01b43052af530917513b169254c25d6fcfe6cb4" +"20d6ce92f54886ef6eaf9a5ba35e893ff593834d05ddf28899e42d729c7df3d21ef036020789739366f0c11ec52ff92a0b" +"fd8ba69508e27b20fabb8217bd36b90e5aa918159ac87913bc7b46c04e366c23c92807fbe9c6a407e6a4db0b4fc23c3b6c" +"706b5ca058fe8c190f849f18d16d6b48b5ed760eb202fd566291a799420b9654e08b8118bcbfead8e9dd2fdb9b053e9bdf" +"b665285c78718f726d0b3d6c37e116428ec9ac9db2637259e4e8d6402bbada46c6bdb03985e19a82e9b4e57de1b025a3cb" +"1f850beae7e8da9941655825bce0e89d536b6ee9064865b1a85c185e9fc9cb7f435de13d44773c00eed442a286e4ab807e" +"3cab4dc3441d1b7d2af693812ae8b39652bb8c835fc895d13d6da93541afeadeee450475c29f3b2dfa8ef1c1e2547463b2" +"cc2f0ff7a42ac4dd35e25c4fa030d2d2766fbe9f2d04c1304671747bace2f7dd55142bfa60f8cbc968bfc3d7a342152dc6" +"84a0fb5a32c0962a62b5220ac0f72add9d8b84d6cc76b97d03245e01fc8da3414a49bb4075d3488f29b56dc42ba69e3b58" +"529448c943ecfd98b3784a39d0b8609a8fb945e757f4569f53bd2cf80f7f638acf5b67fe9c560a3b7b0cf7e0398f31aa8b" +"03cf9c62b24296b6d8596b694469a02686c38daa16a1ef86e012d61a2f7de1693a5c00b3685175caec3c67146477eba548" +"30f1d546cb18a553779aa46adb4f2010e33f3def847c7d89b51a8462b227605f6c920fd558a6daf64bc98682e508ae960c" +"0c571870e603ba1fce0c13d53176f353fd319959e13db93eae1359f06e3dd4767c04f824cf34ec7bf8f60161ba1a615db8" +"2852eca9e3869afa711ab9a090660b0dc6cfbea310dda77e02310fbaeacd2636f975838c2dbcdbe9ac2cd85cee28f5e3f0" +"c73abf62f9fa02cd79a7606b7ba855db68a07848b057c3aaf38f1a70086e14616f6f88305a1f9ce6b41378a620d4db3e0e" +"7e1d421590dccaeff86212e232eeb5eb8a8d33a8c9b25ae88f3a7bd5032b4efa68f8af3186a02ffcbf5456f12beccace94" +"c81c360cc4a0dcc642b59f991eec68c59af78139ca60b96d6a18e9535f8995e89bd2cf6a0aef3acffd33d1c0c1b79b6641" +"4a91d9f65b2b4ec65844b96f725d2b4b0c309f3eb9d714e9dd939bbdfd85ce8fb43679aeab13f6c29549949503c9466dbd" +"337c4cdde46d6eacd15f21f4d8fdeaa627a47884c88a9c85f0b731d271a8ea7cb9e04a4a149c23c10f56b3a0476dc77a99" +"9d6e4f813e4b0f805e2a693e2ae4ae0ecc423c9ba5d17b42e691abf83784a582f2b1fd85d1e0a27ba38a500963568b2450" +"363d2c5e3f7b8ba3e5b56e4e9f745a3a710bf2ae233c303068c532ce78ff031e6ab28b705dd94d7db4500909edb5626b8c" +"9bd5ff4f0b4741388f0b91563ee516934c013e901572cba005ac5c535f4f107903be9af7b2793dfb61b5070facbe71eefe" +"1b5600f975c8c38c3a2350d78beadfecb78e981164ae8bc866e732972d3ceef4aac68e15861f9b881d9b51b4edece150bc" +"124b07645defb4202ef5d0e0962db98cae6ed459561c93c74c20bd64362e4f4fffc389a6cd80514604ff22eecc10c9cbc7" +"981d19a8102b24146354c463107c9dc070e29e70df3578022acf72289ef071ab9f9402a544d0399f1b1e5f206b6d46d445" +"f6d612a490e72918e00c853eda8493bef511149e80c9ab56e8b4b8cba3987249f77d060e61760e5792ac321c987c03c260" +"6e9393a7970212992cdbd16448078d5039d4c2c3199714f53278f4f7b1d2e514cf95bdfc078b8bb0db659cb2c3f5cc0289" +"0ea84f05d414c88d2db9e9f8455659b9fa6254405317245fa070d6970cafb4dadb2522b490a5c8e02fe973a8cdbfbfbdbf" +"b01535099ffba3d3896bc4d1189fc570c3e6fdc6469265b8da912772e75dd62ab71be507f700d56cac5e68fd6b57ec1661" +"68ab5258a69625c142a5b1b3519f94be1bde5e51d3bd8ea0c12d5af2fe4615b1b7bd4a96628a4fabc65925ff09718f63bb" +"ebaad98f89bd9543a27b3ff3b5d8bfa89f941a5eb8cc005ccd4a705190e1c9dc6a9f4264e5ee658520a4438e92de854bff" +"c39f8dc7dfbb5de4f14ba63ea16a37d14a7b4610f95b6cffd55e4679b29cedbdf20e7bd16da822fad910c359ee3a68e48a" +"ae6e769b0e291d5d3aa3e2ca9d8d23abe8a1d5349f4991e9300852cc0befb20c2fc0d169306b260763344024f8092cbcc2" +"4c6807363e9fc548a30d5faab3a94b2af0782a2942be80c45d8b0587efd587394ef33c33022436e285806ddffdd32fe363" +"45c3c38ed8d680abeb7a028b44ee6f94d060a14c7019bb6af1f1b5f0a562957d19826d8cc216f9b908c989ccd5415e3525" +"dfe9422ffb5b50b7cc3083dc325544751e5683535d7439d3da2b0bb73bea551dd99e04e0e793804f4774eb6b1daf781d9c" +"aa5128274e599e847862fe309027813d3e4eda0bbeb7201856a5c5d8370e44dabff0bb229c723ba0a6bcf29c44536147de" +"11b7835991018100105bd4329217f7386903fe8e7363cd7b3e893244e245e0a187467664c05b0be1fd429722b9b9a5e319" +"8147fad72776e8a63aab9054fa9d259af0198d088d71d132e6068676a8e9ebb0f616b51ee34aac39c2c2221c7112401727" +"0d75ff4a048363c389e04e9b440ad2032a381ac2cfc54f409caa791e65ee4f5d6cd035008f219b88a803a7382ae447bf65" +"a3df2176b25b3b7b67dabe34decd9a1384dc7a003916ca8fbcb29b3ad6fd8eac5bbbaa3bdfa6c6a3ad9427c4f3ed79fea2" +"6e14c8ce5fa3b4f82c5f7b6d2125916753a7b92ce9b46d45";
\ No newline at end of file diff --git a/numpy/random/src/dsfmt/dSFMT-test-gen.c b/numpy/random/src/dsfmt/dSFMT-test-gen.c new file mode 100644 index 000000000..697a3010a --- /dev/null +++ b/numpy/random/src/dsfmt/dSFMT-test-gen.c @@ -0,0 +1,58 @@ +/* + * cl dSFMT-test-gen.c dSFMT.c -DHAVE_SSE2 -DDSFMT_MEXP=19937 /Ox + * + * gcc dSFMT-test-gen.c dSFMT.c -DHAVE_SSE2 -DDSFMT_MEXP=19937 -o dSFMT + */ + +#include <inttypes.h> +#include <stdio.h> + +#include "dSFMT.h" + + +int main(void) { + int i; + double d; + uint64_t *temp; + uint32_t seed = 1UL; + dsfmt_t state; + dsfmt_init_gen_rand(&state, seed); + double out[1000]; + dsfmt_fill_array_close1_open2(&state, out, 1000); + + FILE *fp; + fp = fopen("dSFMT-testset-1.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, %" PRIu32 "\n", seed); + for (i = 0; i < 1000; i++) { + d = out[i]; + temp = (uint64_t *)&d; + fprintf(fp, "%d, %" PRIu64 "\n", i, *temp); + if (i==999) { + printf("%d, %" PRIu64 "\n", i, *temp); + } + } + fclose(fp); + + seed = 123456789UL; + dsfmt_init_gen_rand(&state, seed); + dsfmt_fill_array_close1_open2(&state, out, 1000); + fp = fopen("dSFMT-testset-2.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, %" PRIu32 "\n", seed); + for (i = 0; i < 1000; i++) { + d = out[i]; + temp = (uint64_t *)&d; + fprintf(fp, "%d, %" PRIu64 "\n", i, *temp); + if (i==999) { + printf("%d, %" PRIu64 "\n", i, *temp); + } + } + fclose(fp); +} diff --git a/numpy/random/src/dsfmt/dSFMT.c b/numpy/random/src/dsfmt/dSFMT.c new file mode 100644 index 000000000..0f122c26c --- /dev/null +++ b/numpy/random/src/dsfmt/dSFMT.c @@ -0,0 +1,626 @@ +/** + * @file dSFMT.c + * @brief double precision SIMD-oriented Fast Mersenne Twister (dSFMT) + * based on IEEE 754 format. + * + * @author Mutsuo Saito (Hiroshima University) + * @author Makoto Matsumoto (Hiroshima University) + * + * Copyright (C) 2007,2008 Mutsuo Saito, Makoto Matsumoto and Hiroshima + * University. All rights reserved. + * + * The new BSD License is applied to this software, see LICENSE.txt + */ +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "dSFMT-params.h" + +#include "dSFMT-common.h" +#include "dSFMT-jump.h" +#include "dSFMT-poly.h" + +#if defined(__cplusplus) +extern "C" { +#endif + +/** dsfmt internal state vector */ +dsfmt_t dsfmt_global_data; +/** dsfmt mexp for check */ +static const int dsfmt_mexp = DSFMT_MEXP; + +/*---------------- + STATIC FUNCTIONS + ----------------*/ +inline static uint32_t ini_func1(uint32_t x); +inline static uint32_t ini_func2(uint32_t x); +inline static void gen_rand_array_c1o2(dsfmt_t *dsfmt, w128_t *array, int size); +inline static void gen_rand_array_c0o1(dsfmt_t *dsfmt, w128_t *array, int size); +inline static void gen_rand_array_o0c1(dsfmt_t *dsfmt, w128_t *array, int size); +inline static void gen_rand_array_o0o1(dsfmt_t *dsfmt, w128_t *array, int size); +inline static int idxof(int i); +static void initial_mask(dsfmt_t *dsfmt); +static void period_certification(dsfmt_t *dsfmt); + +#if defined(HAVE_SSE2) +/** 1 in 64bit for sse2 */ +static const union X128I_T sse2_int_one = {{1, 1}}; +/** 2.0 double for sse2 */ +static const union X128D_T sse2_double_two = {{2.0, 2.0}}; +/** -1.0 double for sse2 */ +static const union X128D_T sse2_double_m_one = {{-1.0, -1.0}}; +#endif + +/** + * This function simulate a 32-bit array index overlapped to 64-bit + * array of LITTLE ENDIAN in BIG ENDIAN machine. + */ +#if defined(DSFMT_BIG_ENDIAN) +inline static int idxof(int i) { return i ^ 1; } +#else +inline static int idxof(int i) { return i; } +#endif + +#if defined(HAVE_SSE2) +/** + * This function converts the double precision floating point numbers which + * distribute uniformly in the range [1, 2) to those which distribute uniformly + * in the range [0, 1). + * @param w 128bit stracture of double precision floating point numbers (I/O) + */ +inline static void convert_c0o1(w128_t *w) { + w->sd = _mm_add_pd(w->sd, sse2_double_m_one.d128); +} + +/** + * This function converts the double precision floating point numbers which + * distribute uniformly in the range [1, 2) to those which distribute uniformly + * in the range (0, 1]. + * @param w 128bit stracture of double precision floating point numbers (I/O) + */ +inline static void convert_o0c1(w128_t *w) { + w->sd = _mm_sub_pd(sse2_double_two.d128, w->sd); +} + +/** + * This function converts the double precision floating point numbers which + * distribute uniformly in the range [1, 2) to those which distribute uniformly + * in the range (0, 1). + * @param w 128bit stracture of double precision floating point numbers (I/O) + */ +inline static void convert_o0o1(w128_t *w) { + w->si = _mm_or_si128(w->si, sse2_int_one.i128); + w->sd = _mm_add_pd(w->sd, sse2_double_m_one.d128); +} +#else /* standard C and altivec */ +/** + * This function converts the double precision floating point numbers which + * distribute uniformly in the range [1, 2) to those which distribute uniformly + * in the range [0, 1). + * @param w 128bit stracture of double precision floating point numbers (I/O) + */ +inline static void convert_c0o1(w128_t *w) { + w->d[0] -= 1.0; + w->d[1] -= 1.0; +} + +/** + * This function converts the double precision floating point numbers which + * distribute uniformly in the range [1, 2) to those which distribute uniformly + * in the range (0, 1]. + * @param w 128bit stracture of double precision floating point numbers (I/O) + */ +inline static void convert_o0c1(w128_t *w) { + w->d[0] = 2.0 - w->d[0]; + w->d[1] = 2.0 - w->d[1]; +} + +/** + * This function converts the double precision floating point numbers which + * distribute uniformly in the range [1, 2) to those which distribute uniformly + * in the range (0, 1). + * @param w 128bit stracture of double precision floating point numbers (I/O) + */ +inline static void convert_o0o1(w128_t *w) { + w->u[0] |= 1; + w->u[1] |= 1; + w->d[0] -= 1.0; + w->d[1] -= 1.0; +} +#endif + +/** + * This function fills the user-specified array with double precision + * floating point pseudorandom numbers of the IEEE 754 format. + * @param dsfmt dsfmt state vector. + * @param array an 128-bit array to be filled by pseudorandom numbers. + * @param size number of 128-bit pseudorandom numbers to be generated. + */ +inline static void gen_rand_array_c1o2(dsfmt_t *dsfmt, w128_t *array, + int size) { + int i, j; + w128_t lung; + + lung = dsfmt->status[DSFMT_N]; + do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1], &lung); + for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) { + do_recursion(&array[i], &dsfmt->status[i], &dsfmt->status[i + DSFMT_POS1], + &lung); + } + for (; i < DSFMT_N; i++) { + do_recursion(&array[i], &dsfmt->status[i], &array[i + DSFMT_POS1 - DSFMT_N], + &lung); + } + for (; i < size - DSFMT_N; i++) { + do_recursion(&array[i], &array[i - DSFMT_N], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + } + for (j = 0; j < 2 * DSFMT_N - size; j++) { + dsfmt->status[j] = array[j + size - DSFMT_N]; + } + for (; i < size; i++, j++) { + do_recursion(&array[i], &array[i - DSFMT_N], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + dsfmt->status[j] = array[i]; + } + dsfmt->status[DSFMT_N] = lung; +} + +/** + * This function fills the user-specified array with double precision + * floating point pseudorandom numbers of the IEEE 754 format. + * @param dsfmt dsfmt state vector. + * @param array an 128-bit array to be filled by pseudorandom numbers. + * @param size number of 128-bit pseudorandom numbers to be generated. + */ +inline static void gen_rand_array_c0o1(dsfmt_t *dsfmt, w128_t *array, + int size) { + int i, j; + w128_t lung; + + lung = dsfmt->status[DSFMT_N]; + do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1], &lung); + for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) { + do_recursion(&array[i], &dsfmt->status[i], &dsfmt->status[i + DSFMT_POS1], + &lung); + } + for (; i < DSFMT_N; i++) { + do_recursion(&array[i], &dsfmt->status[i], &array[i + DSFMT_POS1 - DSFMT_N], + &lung); + } + for (; i < size - DSFMT_N; i++) { + do_recursion(&array[i], &array[i - DSFMT_N], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + convert_c0o1(&array[i - DSFMT_N]); + } + for (j = 0; j < 2 * DSFMT_N - size; j++) { + dsfmt->status[j] = array[j + size - DSFMT_N]; + } + for (; i < size; i++, j++) { + do_recursion(&array[i], &array[i - DSFMT_N], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + dsfmt->status[j] = array[i]; + convert_c0o1(&array[i - DSFMT_N]); + } + for (i = size - DSFMT_N; i < size; i++) { + convert_c0o1(&array[i]); + } + dsfmt->status[DSFMT_N] = lung; +} + +/** + * This function fills the user-specified array with double precision + * floating point pseudorandom numbers of the IEEE 754 format. + * @param dsfmt dsfmt state vector. + * @param array an 128-bit array to be filled by pseudorandom numbers. + * @param size number of 128-bit pseudorandom numbers to be generated. + */ +inline static void gen_rand_array_o0o1(dsfmt_t *dsfmt, w128_t *array, + int size) { + int i, j; + w128_t lung; + + lung = dsfmt->status[DSFMT_N]; + do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1], &lung); + for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) { + do_recursion(&array[i], &dsfmt->status[i], &dsfmt->status[i + DSFMT_POS1], + &lung); + } + for (; i < DSFMT_N; i++) { + do_recursion(&array[i], &dsfmt->status[i], &array[i + DSFMT_POS1 - DSFMT_N], + &lung); + } + for (; i < size - DSFMT_N; i++) { + do_recursion(&array[i], &array[i - DSFMT_N], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + convert_o0o1(&array[i - DSFMT_N]); + } + for (j = 0; j < 2 * DSFMT_N - size; j++) { + dsfmt->status[j] = array[j + size - DSFMT_N]; + } + for (; i < size; i++, j++) { + do_recursion(&array[i], &array[i - DSFMT_N], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + dsfmt->status[j] = array[i]; + convert_o0o1(&array[i - DSFMT_N]); + } + for (i = size - DSFMT_N; i < size; i++) { + convert_o0o1(&array[i]); + } + dsfmt->status[DSFMT_N] = lung; +} + +/** + * This function fills the user-specified array with double precision + * floating point pseudorandom numbers of the IEEE 754 format. + * @param dsfmt dsfmt state vector. + * @param array an 128-bit array to be filled by pseudorandom numbers. + * @param size number of 128-bit pseudorandom numbers to be generated. + */ +inline static void gen_rand_array_o0c1(dsfmt_t *dsfmt, w128_t *array, + int size) { + int i, j; + w128_t lung; + + lung = dsfmt->status[DSFMT_N]; + do_recursion(&array[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1], &lung); + for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) { + do_recursion(&array[i], &dsfmt->status[i], &dsfmt->status[i + DSFMT_POS1], + &lung); + } + for (; i < DSFMT_N; i++) { + do_recursion(&array[i], &dsfmt->status[i], &array[i + DSFMT_POS1 - DSFMT_N], + &lung); + } + for (; i < size - DSFMT_N; i++) { + do_recursion(&array[i], &array[i - DSFMT_N], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + convert_o0c1(&array[i - DSFMT_N]); + } + for (j = 0; j < 2 * DSFMT_N - size; j++) { + dsfmt->status[j] = array[j + size - DSFMT_N]; + } + for (; i < size; i++, j++) { + do_recursion(&array[i], &array[i - DSFMT_N], + &array[i + DSFMT_POS1 - DSFMT_N], &lung); + dsfmt->status[j] = array[i]; + convert_o0c1(&array[i - DSFMT_N]); + } + for (i = size - DSFMT_N; i < size; i++) { + convert_o0c1(&array[i]); + } + dsfmt->status[DSFMT_N] = lung; +} + +/** + * This function represents a function used in the initialization + * by init_by_array + * @param x 32-bit integer + * @return 32-bit integer + */ +static uint32_t ini_func1(uint32_t x) { + return (x ^ (x >> 27)) * (uint32_t)1664525UL; +} + +/** + * This function represents a function used in the initialization + * by init_by_array + * @param x 32-bit integer + * @return 32-bit integer + */ +static uint32_t ini_func2(uint32_t x) { + return (x ^ (x >> 27)) * (uint32_t)1566083941UL; +} + +/** + * This function initializes the internal state array to fit the IEEE + * 754 format. + * @param dsfmt dsfmt state vector. + */ +static void initial_mask(dsfmt_t *dsfmt) { + int i; + uint64_t *psfmt; + + psfmt = &dsfmt->status[0].u[0]; + for (i = 0; i < DSFMT_N * 2; i++) { + psfmt[i] = (psfmt[i] & DSFMT_LOW_MASK) | DSFMT_HIGH_CONST; + } +} + +/** + * This function certificate the period of 2^{SFMT_MEXP}-1. + * @param dsfmt dsfmt state vector. + */ +static void period_certification(dsfmt_t *dsfmt) { + uint64_t pcv[2] = {DSFMT_PCV1, DSFMT_PCV2}; + uint64_t tmp[2]; + uint64_t inner; + int i; +#if (DSFMT_PCV2 & 1) != 1 + int j; + uint64_t work; +#endif + + tmp[0] = (dsfmt->status[DSFMT_N].u[0] ^ DSFMT_FIX1); + tmp[1] = (dsfmt->status[DSFMT_N].u[1] ^ DSFMT_FIX2); + + inner = tmp[0] & pcv[0]; + inner ^= tmp[1] & pcv[1]; + for (i = 32; i > 0; i >>= 1) { + inner ^= inner >> i; + } + inner &= 1; + /* check OK */ + if (inner == 1) { + return; + } + /* check NG, and modification */ +#if (DSFMT_PCV2 & 1) == 1 + dsfmt->status[DSFMT_N].u[1] ^= 1; +#else + for (i = 1; i >= 0; i--) { + work = 1; + for (j = 0; j < 64; j++) { + if ((work & pcv[i]) != 0) { + dsfmt->status[DSFMT_N].u[i] ^= work; + return; + } + work = work << 1; + } + } +#endif + return; +} + +/*---------------- + PUBLIC FUNCTIONS + ----------------*/ +/** + * This function returns the identification string. The string shows + * the Mersenne exponent, and all parameters of this generator. + * @return id string. + */ +const char *dsfmt_get_idstring(void) { return DSFMT_IDSTR; } + +/** + * This function returns the minimum size of array used for \b + * fill_array functions. + * @return minimum size of array used for fill_array functions. + */ +int dsfmt_get_min_array_size(void) { return DSFMT_N64; } + +/** + * This function fills the internal state array with double precision + * floating point pseudorandom numbers of the IEEE 754 format. + * @param dsfmt dsfmt state vector. + */ +void dsfmt_gen_rand_all(dsfmt_t *dsfmt) { + int i; + w128_t lung; + + lung = dsfmt->status[DSFMT_N]; + do_recursion(&dsfmt->status[0], &dsfmt->status[0], &dsfmt->status[DSFMT_POS1], + &lung); + for (i = 1; i < DSFMT_N - DSFMT_POS1; i++) { + do_recursion(&dsfmt->status[i], &dsfmt->status[i], + &dsfmt->status[i + DSFMT_POS1], &lung); + } + for (; i < DSFMT_N; i++) { + do_recursion(&dsfmt->status[i], &dsfmt->status[i], + &dsfmt->status[i + DSFMT_POS1 - DSFMT_N], &lung); + } + dsfmt->status[DSFMT_N] = lung; +} + +/** + * This function generates double precision floating point + * pseudorandom numbers which distribute in the range [1, 2) to the + * specified array[] by one call. The number of pseudorandom numbers + * is specified by the argument \b size, which must be at least (SFMT_MEXP + * / 128) * 2 and a multiple of two. The function + * get_min_array_size() returns this minimum size. The generation by + * this function is much faster than the following fill_array_xxx functions. + * + * For initialization, init_gen_rand() or init_by_array() must be called + * before the first call of this function. This function can not be + * used after calling genrand_xxx functions, without initialization. + * + * @param dsfmt dsfmt state vector. + * @param array an array where pseudorandom numbers are filled + * by this function. The pointer to the array must be "aligned" + * (namely, must be a multiple of 16) in the SIMD version, since it + * refers to the address of a 128-bit integer. In the standard C + * version, the pointer is arbitrary. + * + * @param size the number of 64-bit pseudorandom integers to be + * generated. size must be a multiple of 2, and greater than or equal + * to (SFMT_MEXP / 128) * 2. + * + * @note \b memalign or \b posix_memalign is available to get aligned + * memory. Mac OSX doesn't have these functions, but \b malloc of OSX + * returns the pointer to the aligned memory block. + */ +void dsfmt_fill_array_close1_open2(dsfmt_t *dsfmt, double array[], int size) { + assert(size % 2 == 0); + assert(size >= DSFMT_N64); + gen_rand_array_c1o2(dsfmt, (w128_t *)array, size / 2); +} + +/** + * This function generates double precision floating point + * pseudorandom numbers which distribute in the range (0, 1] to the + * specified array[] by one call. This function is the same as + * fill_array_close1_open2() except the distribution range. + * + * @param dsfmt dsfmt state vector. + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa fill_array_close1_open2() + */ +void dsfmt_fill_array_open_close(dsfmt_t *dsfmt, double array[], int size) { + assert(size % 2 == 0); + assert(size >= DSFMT_N64); + gen_rand_array_o0c1(dsfmt, (w128_t *)array, size / 2); +} + +/** + * This function generates double precision floating point + * pseudorandom numbers which distribute in the range [0, 1) to the + * specified array[] by one call. This function is the same as + * fill_array_close1_open2() except the distribution range. + * + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param dsfmt dsfmt state vector. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa fill_array_close1_open2() + */ +void dsfmt_fill_array_close_open(dsfmt_t *dsfmt, double array[], int size) { + assert(size % 2 == 0); + assert(size >= DSFMT_N64); + gen_rand_array_c0o1(dsfmt, (w128_t *)array, size / 2); +} + +/** + * This function generates double precision floating point + * pseudorandom numbers which distribute in the range (0, 1) to the + * specified array[] by one call. This function is the same as + * fill_array_close1_open2() except the distribution range. + * + * @param dsfmt dsfmt state vector. + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa fill_array_close1_open2() + */ +void dsfmt_fill_array_open_open(dsfmt_t *dsfmt, double array[], int size) { + assert(size % 2 == 0); + assert(size >= DSFMT_N64); + gen_rand_array_o0o1(dsfmt, (w128_t *)array, size / 2); +} + +#if defined(__INTEL_COMPILER) +#pragma warning(disable : 981) +#endif +/** + * This function initializes the internal state array with a 32-bit + * integer seed. + * @param dsfmt dsfmt state vector. + * @param seed a 32-bit integer used as the seed. + * @param mexp caller's mersenne expornent + */ +void dsfmt_chk_init_gen_rand(dsfmt_t *dsfmt, uint32_t seed, int mexp) { + int i; + uint32_t *psfmt; + + /* make sure caller program is compiled with the same MEXP */ + if (mexp != dsfmt_mexp) { + fprintf(stderr, "DSFMT_MEXP doesn't match with dSFMT.c\n"); + exit(1); + } + psfmt = &dsfmt->status[0].u32[0]; + psfmt[idxof(0)] = seed; + for (i = 1; i < (DSFMT_N + 1) * 4; i++) { + psfmt[idxof(i)] = + 1812433253UL * (psfmt[idxof(i - 1)] ^ (psfmt[idxof(i - 1)] >> 30)) + i; + } + initial_mask(dsfmt); + period_certification(dsfmt); + dsfmt->idx = DSFMT_N64; +} + +/** + * This function initializes the internal state array, + * with an array of 32-bit integers used as the seeds + * @param dsfmt dsfmt state vector. + * @param init_key the array of 32-bit integers, used as a seed. + * @param key_length the length of init_key. + * @param mexp caller's mersenne expornent + */ +void dsfmt_chk_init_by_array(dsfmt_t *dsfmt, uint32_t init_key[], + int key_length, int mexp) { + int i, j, count; + uint32_t r; + uint32_t *psfmt32; + int lag; + int mid; + int size = (DSFMT_N + 1) * 4; /* pulmonary */ + + /* make sure caller program is compiled with the same MEXP */ + if (mexp != dsfmt_mexp) { + fprintf(stderr, "DSFMT_MEXP doesn't match with dSFMT.c\n"); + exit(1); + } + if (size >= 623) { + lag = 11; + } else if (size >= 68) { + lag = 7; + } else if (size >= 39) { + lag = 5; + } else { + lag = 3; + } + mid = (size - lag) / 2; + + psfmt32 = &dsfmt->status[0].u32[0]; + memset(dsfmt->status, 0x8b, sizeof(dsfmt->status)); + if (key_length + 1 > size) { + count = key_length + 1; + } else { + count = size; + } + r = ini_func1(psfmt32[idxof(0)] ^ psfmt32[idxof(mid % size)] ^ + psfmt32[idxof((size - 1) % size)]); + psfmt32[idxof(mid % size)] += r; + r += key_length; + psfmt32[idxof((mid + lag) % size)] += r; + psfmt32[idxof(0)] = r; + count--; + for (i = 1, j = 0; (j < count) && (j < key_length); j++) { + r = ini_func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % size)] ^ + psfmt32[idxof((i + size - 1) % size)]); + psfmt32[idxof((i + mid) % size)] += r; + r += init_key[j] + i; + psfmt32[idxof((i + mid + lag) % size)] += r; + psfmt32[idxof(i)] = r; + i = (i + 1) % size; + } + for (; j < count; j++) { + r = ini_func1(psfmt32[idxof(i)] ^ psfmt32[idxof((i + mid) % size)] ^ + psfmt32[idxof((i + size - 1) % size)]); + psfmt32[idxof((i + mid) % size)] += r; + r += i; + psfmt32[idxof((i + mid + lag) % size)] += r; + psfmt32[idxof(i)] = r; + i = (i + 1) % size; + } + for (j = 0; j < size; j++) { + r = ini_func2(psfmt32[idxof(i)] + psfmt32[idxof((i + mid) % size)] + + psfmt32[idxof((i + size - 1) % size)]); + psfmt32[idxof((i + mid) % size)] ^= r; + r -= i; + psfmt32[idxof((i + mid + lag) % size)] ^= r; + psfmt32[idxof(i)] = r; + i = (i + 1) % size; + } + initial_mask(dsfmt); + period_certification(dsfmt); + dsfmt->idx = DSFMT_N64; +} +#if defined(__INTEL_COMPILER) +#pragma warning(default : 981) +#endif + +#if defined(__cplusplus) +} +#endif + +extern inline double dsfmt_next_double(dsfmt_state *state); + +extern inline uint64_t dsfmt_next64(dsfmt_state *state); + +extern inline uint32_t dsfmt_next32(dsfmt_state *state); + +void dsfmt_jump(dsfmt_state *state) { dSFMT_jump(state->state, poly_128); };
\ No newline at end of file diff --git a/numpy/random/src/dsfmt/dSFMT.h b/numpy/random/src/dsfmt/dSFMT.h new file mode 100644 index 000000000..224d0108f --- /dev/null +++ b/numpy/random/src/dsfmt/dSFMT.h @@ -0,0 +1,691 @@ +#pragma once +/** + * @file dSFMT.h + * + * @brief double precision SIMD oriented Fast Mersenne Twister(dSFMT) + * pseudorandom number generator based on IEEE 754 format. + * + * @author Mutsuo Saito (Hiroshima University) + * @author Makoto Matsumoto (Hiroshima University) + * + * Copyright (C) 2007, 2008 Mutsuo Saito, Makoto Matsumoto and + * Hiroshima University. All rights reserved. + * Copyright (C) 2012 Mutsuo Saito, Makoto Matsumoto, + * Hiroshima University and The University of Tokyo. + * All rights reserved. + * + * The new BSD License is applied to this software. + * see LICENSE.txt + * + * @note We assume that your system has inttypes.h. If your system + * doesn't have inttypes.h, you have to typedef uint32_t and uint64_t, + * and you have to define PRIu64 and PRIx64 in this file as follows: + * @verbatim + typedef unsigned int uint32_t + typedef unsigned long long uint64_t + #define PRIu64 "llu" + #define PRIx64 "llx" +@endverbatim + * uint32_t must be exactly 32-bit unsigned integer type (no more, no + * less), and uint64_t must be exactly 64-bit unsigned integer type. + * PRIu64 and PRIx64 are used for printf function to print 64-bit + * unsigned int and 64-bit unsigned int in hexadecimal format. + */ + +#ifndef DSFMT_H +#define DSFMT_H +#if defined(__cplusplus) +extern "C" { +#endif + +#include <assert.h> +#include <stdio.h> + +#if !defined(DSFMT_MEXP) +#ifdef __GNUC__ +#warning "DSFMT_MEXP is not defined. I assume DSFMT_MEXP is 19937." +#endif +#define DSFMT_MEXP 19937 +#endif +/*----------------- + BASIC DEFINITIONS + -----------------*/ +/* Mersenne Exponent. The period of the sequence + * is a multiple of 2^DSFMT_MEXP-1. + * #define DSFMT_MEXP 19937 */ +/** DSFMT generator has an internal state array of 128-bit integers, + * and N is its size. */ +#define DSFMT_N ((DSFMT_MEXP - 128) / 104 + 1) +/** N32 is the size of internal state array when regarded as an array + * of 32-bit integers.*/ +#define DSFMT_N32 (DSFMT_N * 4) +/** N64 is the size of internal state array when regarded as an array + * of 64-bit integers.*/ +#define DSFMT_N64 (DSFMT_N * 2) + +#if !defined(DSFMT_BIG_ENDIAN) +#if defined(__BYTE_ORDER) && defined(__BIG_ENDIAN) +#if __BYTE_ORDER == __BIG_ENDIAN +#define DSFMT_BIG_ENDIAN 1 +#endif +#elif defined(_BYTE_ORDER) && defined(_BIG_ENDIAN) +#if _BYTE_ORDER == _BIG_ENDIAN +#define DSFMT_BIG_ENDIAN 1 +#endif +#elif defined(__BYTE_ORDER__) && defined(__BIG_ENDIAN__) +#if __BYTE_ORDER__ == __BIG_ENDIAN__ +#define DSFMT_BIG_ENDIAN 1 +#endif +#elif defined(BYTE_ORDER) && defined(BIG_ENDIAN) +#if BYTE_ORDER == BIG_ENDIAN +#define DSFMT_BIG_ENDIAN 1 +#endif +#elif defined(__BIG_ENDIAN) || defined(_BIG_ENDIAN) || \ + defined(__BIG_ENDIAN__) || defined(BIG_ENDIAN) +#define DSFMT_BIG_ENDIAN 1 +#endif +#endif + +#if defined(DSFMT_BIG_ENDIAN) && defined(__amd64) +#undef DSFMT_BIG_ENDIAN +#endif + +#if defined(__STDC_VERSION__) && (__STDC_VERSION__ >= 199901L) +#include <inttypes.h> +#elif defined(_MSC_VER) || defined(__BORLANDC__) +#if !defined(DSFMT_UINT32_DEFINED) && !defined(SFMT_UINT32_DEFINED) +typedef unsigned int uint32_t; +typedef unsigned __int64 uint64_t; +#ifndef UINT64_C +#define UINT64_C(v) (v##ui64) +#endif +#define DSFMT_UINT32_DEFINED +#if !defined(inline) && !defined(__cplusplus) +#define inline __forceinline +#endif +#endif +#else +#include <inttypes.h> +#if !defined(inline) && !defined(__cplusplus) +#if defined(__GNUC__) +#define inline __forceinline__ +#else +#define inline +#endif +#endif +#endif + +#ifndef PRIu64 +#if defined(_MSC_VER) || defined(__BORLANDC__) +#define PRIu64 "I64u" +#define PRIx64 "I64x" +#else +#define PRIu64 "llu" +#define PRIx64 "llx" +#endif +#endif + +#ifndef UINT64_C +#define UINT64_C(v) (v##ULL) +#endif + +/*------------------------------------------ + 128-bit SIMD like data type for standard C + ------------------------------------------*/ +#if defined(HAVE_ALTIVEC) +#if !defined(__APPLE__) +#include <altivec.h> +#endif +/** 128-bit data structure */ +union W128_T { + vector unsigned int s; + uint64_t u[2]; + uint32_t u32[4]; + double d[2]; +}; + +#elif defined(HAVE_SSE2) +#include <emmintrin.h> + +/** 128-bit data structure */ +union W128_T { + __m128i si; + __m128d sd; + uint64_t u[2]; + uint32_t u32[4]; + double d[2]; +}; +#else /* standard C */ +/** 128-bit data structure */ +union W128_T { + uint64_t u[2]; + uint32_t u32[4]; + double d[2]; +}; +#endif + +/** 128-bit data type */ +typedef union W128_T w128_t; + +/** the 128-bit internal state array */ +struct DSFMT_T { + w128_t status[DSFMT_N + 1]; + int idx; +}; +typedef struct DSFMT_T dsfmt_t; + +/** dsfmt internal state vector */ +extern dsfmt_t dsfmt_global_data; +/** dsfmt mexp for check */ +extern const int dsfmt_global_mexp; + +void dsfmt_gen_rand_all(dsfmt_t *dsfmt); +void dsfmt_fill_array_open_close(dsfmt_t *dsfmt, double array[], int size); +void dsfmt_fill_array_close_open(dsfmt_t *dsfmt, double array[], int size); +void dsfmt_fill_array_open_open(dsfmt_t *dsfmt, double array[], int size); +void dsfmt_fill_array_close1_open2(dsfmt_t *dsfmt, double array[], int size); +void dsfmt_chk_init_gen_rand(dsfmt_t *dsfmt, uint32_t seed, int mexp); +void dsfmt_chk_init_by_array(dsfmt_t *dsfmt, uint32_t init_key[], + int key_length, int mexp); +const char *dsfmt_get_idstring(void); +int dsfmt_get_min_array_size(void); + +#if defined(__GNUC__) +#define DSFMT_PRE_INLINE inline static +#define DSFMT_PST_INLINE __attribute__((always_inline)) +#elif defined(_MSC_VER) && _MSC_VER >= 1200 +#define DSFMT_PRE_INLINE __forceinline static +#define DSFMT_PST_INLINE +#else +#define DSFMT_PRE_INLINE inline static +#define DSFMT_PST_INLINE +#endif +DSFMT_PRE_INLINE uint32_t dsfmt_genrand_uint32(dsfmt_t *dsfmt) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double +dsfmt_genrand_close1_open2(dsfmt_t *dsfmt) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double +dsfmt_genrand_close_open(dsfmt_t *dsfmt) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double +dsfmt_genrand_open_close(dsfmt_t *dsfmt) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double +dsfmt_genrand_open_open(dsfmt_t *dsfmt) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE uint32_t dsfmt_gv_genrand_uint32(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double dsfmt_gv_genrand_close1_open2(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double dsfmt_gv_genrand_close_open(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double dsfmt_gv_genrand_open_close(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double dsfmt_gv_genrand_open_open(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void dsfmt_gv_fill_array_open_close(double array[], + int size) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void dsfmt_gv_fill_array_close_open(double array[], + int size) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void dsfmt_gv_fill_array_open_open(double array[], + int size) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void +dsfmt_gv_fill_array_close1_open2(double array[], int size) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void dsfmt_gv_init_gen_rand(uint32_t seed) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void dsfmt_gv_init_by_array(uint32_t init_key[], + int key_length) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void dsfmt_init_gen_rand(dsfmt_t *dsfmt, + uint32_t seed) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void dsfmt_init_by_array(dsfmt_t *dsfmt, uint32_t init_key[], + int key_length) DSFMT_PST_INLINE; + +/** + * This function generates and returns unsigned 32-bit integer. + * This is slower than SFMT, only for convenience usage. + * dsfmt_init_gen_rand() or dsfmt_init_by_array() must be called + * before this function. + * @param dsfmt dsfmt internal state date + * @return double precision floating point pseudorandom number + */ +inline static uint32_t dsfmt_genrand_uint32(dsfmt_t *dsfmt) { + uint32_t r; + uint64_t *psfmt64 = &dsfmt->status[0].u[0]; + + if (dsfmt->idx >= DSFMT_N64) { + dsfmt_gen_rand_all(dsfmt); + dsfmt->idx = 0; + } + r = psfmt64[dsfmt->idx++] & 0xffffffffU; + return r; +} + +/** + * This function generates and returns double precision pseudorandom + * number which distributes uniformly in the range [1, 2). This is + * the primitive and faster than generating numbers in other ranges. + * dsfmt_init_gen_rand() or dsfmt_init_by_array() must be called + * before this function. + * @param dsfmt dsfmt internal state date + * @return double precision floating point pseudorandom number + */ +inline static double dsfmt_genrand_close1_open2(dsfmt_t *dsfmt) { + double r; + double *psfmt64 = &dsfmt->status[0].d[0]; + + if (dsfmt->idx >= DSFMT_N64) { + dsfmt_gen_rand_all(dsfmt); + dsfmt->idx = 0; + } + r = psfmt64[dsfmt->idx++]; + return r; +} + +/** + * This function generates and returns unsigned 32-bit integer. + * This is slower than SFMT, only for convenience usage. + * dsfmt_gv_init_gen_rand() or dsfmt_gv_init_by_array() must be called + * before this function. This function uses \b global variables. + * @return double precision floating point pseudorandom number + */ +inline static uint32_t dsfmt_gv_genrand_uint32(void) { + return dsfmt_genrand_uint32(&dsfmt_global_data); +} + +/** + * This function generates and returns double precision pseudorandom + * number which distributes uniformly in the range [1, 2). + * dsfmt_gv_init_gen_rand() or dsfmt_gv_init_by_array() must be called + * before this function. This function uses \b global variables. + * @return double precision floating point pseudorandom number + */ +inline static double dsfmt_gv_genrand_close1_open2(void) { + return dsfmt_genrand_close1_open2(&dsfmt_global_data); +} + +/** + * This function generates and returns double precision pseudorandom + * number which distributes uniformly in the range [0, 1). + * dsfmt_init_gen_rand() or dsfmt_init_by_array() must be called + * before this function. + * @param dsfmt dsfmt internal state date + * @return double precision floating point pseudorandom number + */ +inline static double dsfmt_genrand_close_open(dsfmt_t *dsfmt) { + return dsfmt_genrand_close1_open2(dsfmt) - 1.0; +} + +/** + * This function generates and returns double precision pseudorandom + * number which distributes uniformly in the range [0, 1). + * dsfmt_gv_init_gen_rand() or dsfmt_gv_init_by_array() must be called + * before this function. This function uses \b global variables. + * @return double precision floating point pseudorandom number + */ +inline static double dsfmt_gv_genrand_close_open(void) { + return dsfmt_gv_genrand_close1_open2() - 1.0; +} + +/** + * This function generates and returns double precision pseudorandom + * number which distributes uniformly in the range (0, 1]. + * dsfmt_init_gen_rand() or dsfmt_init_by_array() must be called + * before this function. + * @param dsfmt dsfmt internal state date + * @return double precision floating point pseudorandom number + */ +inline static double dsfmt_genrand_open_close(dsfmt_t *dsfmt) { + return 2.0 - dsfmt_genrand_close1_open2(dsfmt); +} + +/** + * This function generates and returns double precision pseudorandom + * number which distributes uniformly in the range (0, 1]. + * dsfmt_gv_init_gen_rand() or dsfmt_gv_init_by_array() must be called + * before this function. This function uses \b global variables. + * @return double precision floating point pseudorandom number + */ +inline static double dsfmt_gv_genrand_open_close(void) { + return 2.0 - dsfmt_gv_genrand_close1_open2(); +} + +/** + * This function generates and returns double precision pseudorandom + * number which distributes uniformly in the range (0, 1). + * dsfmt_init_gen_rand() or dsfmt_init_by_array() must be called + * before this function. + * @param dsfmt dsfmt internal state date + * @return double precision floating point pseudorandom number + */ +inline static double dsfmt_genrand_open_open(dsfmt_t *dsfmt) { + double *dsfmt64 = &dsfmt->status[0].d[0]; + union { + double d; + uint64_t u; + } r; + + if (dsfmt->idx >= DSFMT_N64) { + dsfmt_gen_rand_all(dsfmt); + dsfmt->idx = 0; + } + r.d = dsfmt64[dsfmt->idx++]; + r.u |= 1; + return r.d - 1.0; +} + +/** + * This function generates and returns double precision pseudorandom + * number which distributes uniformly in the range (0, 1). + * dsfmt_gv_init_gen_rand() or dsfmt_gv_init_by_array() must be called + * before this function. This function uses \b global variables. + * @return double precision floating point pseudorandom number + */ +inline static double dsfmt_gv_genrand_open_open(void) { + return dsfmt_genrand_open_open(&dsfmt_global_data); +} + +/** + * This function generates double precision floating point + * pseudorandom numbers which distribute in the range [1, 2) to the + * specified array[] by one call. This function is the same as + * dsfmt_fill_array_close1_open2() except that this function uses + * \b global variables. + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa dsfmt_fill_array_close1_open2() + */ +inline static void dsfmt_gv_fill_array_close1_open2(double array[], int size) { + dsfmt_fill_array_close1_open2(&dsfmt_global_data, array, size); +} + +/** + * This function generates double precision floating point + * pseudorandom numbers which distribute in the range (0, 1] to the + * specified array[] by one call. This function is the same as + * dsfmt_gv_fill_array_close1_open2() except the distribution range. + * This function uses \b global variables. + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa dsfmt_fill_array_close1_open2() and \sa + * dsfmt_gv_fill_array_close1_open2() + */ +inline static void dsfmt_gv_fill_array_open_close(double array[], int size) { + dsfmt_fill_array_open_close(&dsfmt_global_data, array, size); +} + +/** + * This function generates double precision floating point + * pseudorandom numbers which distribute in the range [0, 1) to the + * specified array[] by one call. This function is the same as + * dsfmt_gv_fill_array_close1_open2() except the distribution range. + * This function uses \b global variables. + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa dsfmt_fill_array_close1_open2() \sa + * dsfmt_gv_fill_array_close1_open2() + */ +inline static void dsfmt_gv_fill_array_close_open(double array[], int size) { + dsfmt_fill_array_close_open(&dsfmt_global_data, array, size); +} + +/** + * This function generates double precision floating point + * pseudorandom numbers which distribute in the range (0, 1) to the + * specified array[] by one call. This function is the same as + * dsfmt_gv_fill_array_close1_open2() except the distribution range. + * This function uses \b global variables. + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa dsfmt_fill_array_close1_open2() \sa + * dsfmt_gv_fill_array_close1_open2() + */ +inline static void dsfmt_gv_fill_array_open_open(double array[], int size) { + dsfmt_fill_array_open_open(&dsfmt_global_data, array, size); +} + +/** + * This function initializes the internal state array with a 32-bit + * integer seed. + * @param dsfmt dsfmt state vector. + * @param seed a 32-bit integer used as the seed. + */ +inline static void dsfmt_init_gen_rand(dsfmt_t *dsfmt, uint32_t seed) { + dsfmt_chk_init_gen_rand(dsfmt, seed, DSFMT_MEXP); +} + +/** + * This function initializes the internal state array with a 32-bit + * integer seed. This function uses \b global variables. + * @param seed a 32-bit integer used as the seed. + * see also \sa dsfmt_init_gen_rand() + */ +inline static void dsfmt_gv_init_gen_rand(uint32_t seed) { + dsfmt_init_gen_rand(&dsfmt_global_data, seed); +} + +/** + * This function initializes the internal state array, + * with an array of 32-bit integers used as the seeds. + * @param dsfmt dsfmt state vector + * @param init_key the array of 32-bit integers, used as a seed. + * @param key_length the length of init_key. + */ +inline static void dsfmt_init_by_array(dsfmt_t *dsfmt, uint32_t init_key[], + int key_length) { + dsfmt_chk_init_by_array(dsfmt, init_key, key_length, DSFMT_MEXP); +} + +/** + * This function initializes the internal state array, + * with an array of 32-bit integers used as the seeds. + * This function uses \b global variables. + * @param init_key the array of 32-bit integers, used as a seed. + * @param key_length the length of init_key. + * see also \sa dsfmt_init_by_array() + */ +inline static void dsfmt_gv_init_by_array(uint32_t init_key[], int key_length) { + dsfmt_init_by_array(&dsfmt_global_data, init_key, key_length); +} + +#if !defined(DSFMT_DO_NOT_USE_OLD_NAMES) +DSFMT_PRE_INLINE const char *get_idstring(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE int get_min_array_size(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void init_gen_rand(uint32_t seed) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void init_by_array(uint32_t init_key[], + int key_length) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double genrand_close1_open2(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double genrand_close_open(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double genrand_open_close(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE double genrand_open_open(void) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void fill_array_open_close(double array[], + int size) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void fill_array_close_open(double array[], + int size) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void fill_array_open_open(double array[], + int size) DSFMT_PST_INLINE; +DSFMT_PRE_INLINE void fill_array_close1_open2(double array[], + int size) DSFMT_PST_INLINE; + +/** + * This function is just the same as dsfmt_get_idstring(). + * @return id string. + * see also \sa dsfmt_get_idstring() + */ +inline static const char *get_idstring(void) { return dsfmt_get_idstring(); } + +/** + * This function is just the same as dsfmt_get_min_array_size(). + * @return minimum size of array used for fill_array functions. + * see also \sa dsfmt_get_min_array_size() + */ +inline static int get_min_array_size(void) { + return dsfmt_get_min_array_size(); +} + +/** + * This function is just the same as dsfmt_gv_init_gen_rand(). + * @param seed a 32-bit integer used as the seed. + * see also \sa dsfmt_gv_init_gen_rand(), \sa dsfmt_init_gen_rand(). + */ +inline static void init_gen_rand(uint32_t seed) { + dsfmt_gv_init_gen_rand(seed); +} + +/** + * This function is just the same as dsfmt_gv_init_by_array(). + * @param init_key the array of 32-bit integers, used as a seed. + * @param key_length the length of init_key. + * see also \sa dsfmt_gv_init_by_array(), \sa dsfmt_init_by_array(). + */ +inline static void init_by_array(uint32_t init_key[], int key_length) { + dsfmt_gv_init_by_array(init_key, key_length); +} + +/** + * This function is just the same as dsfmt_gv_genrand_close1_open2(). + * @return double precision floating point number. + * see also \sa dsfmt_genrand_close1_open2() \sa + * dsfmt_gv_genrand_close1_open2() + */ +inline static double genrand_close1_open2(void) { + return dsfmt_gv_genrand_close1_open2(); +} + +/** + * This function is just the same as dsfmt_gv_genrand_close_open(). + * @return double precision floating point number. + * see also \sa dsfmt_genrand_close_open() \sa + * dsfmt_gv_genrand_close_open() + */ +inline static double genrand_close_open(void) { + return dsfmt_gv_genrand_close_open(); +} + +/** + * This function is just the same as dsfmt_gv_genrand_open_close(). + * @return double precision floating point number. + * see also \sa dsfmt_genrand_open_close() \sa + * dsfmt_gv_genrand_open_close() + */ +inline static double genrand_open_close(void) { + return dsfmt_gv_genrand_open_close(); +} + +/** + * This function is just the same as dsfmt_gv_genrand_open_open(). + * @return double precision floating point number. + * see also \sa dsfmt_genrand_open_open() \sa + * dsfmt_gv_genrand_open_open() + */ +inline static double genrand_open_open(void) { + return dsfmt_gv_genrand_open_open(); +} + +/** + * This function is juset the same as dsfmt_gv_fill_array_open_close(). + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa dsfmt_gv_fill_array_open_close(), \sa + * dsfmt_fill_array_close1_open2(), \sa + * dsfmt_gv_fill_array_close1_open2() + */ +inline static void fill_array_open_close(double array[], int size) { + dsfmt_gv_fill_array_open_close(array, size); +} + +/** + * This function is juset the same as dsfmt_gv_fill_array_close_open(). + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa dsfmt_gv_fill_array_close_open(), \sa + * dsfmt_fill_array_close1_open2(), \sa + * dsfmt_gv_fill_array_close1_open2() + */ +inline static void fill_array_close_open(double array[], int size) { + dsfmt_gv_fill_array_close_open(array, size); +} + +/** + * This function is juset the same as dsfmt_gv_fill_array_open_open(). + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa dsfmt_gv_fill_array_open_open(), \sa + * dsfmt_fill_array_close1_open2(), \sa + * dsfmt_gv_fill_array_close1_open2() + */ +inline static void fill_array_open_open(double array[], int size) { + dsfmt_gv_fill_array_open_open(array, size); +} + +/** + * This function is juset the same as dsfmt_gv_fill_array_close1_open2(). + * @param array an array where pseudorandom numbers are filled + * by this function. + * @param size the number of pseudorandom numbers to be generated. + * see also \sa dsfmt_fill_array_close1_open2(), \sa + * dsfmt_gv_fill_array_close1_open2() + */ +inline static void fill_array_close1_open2(double array[], int size) { + dsfmt_gv_fill_array_close1_open2(array, size); +} +#endif /* DSFMT_DO_NOT_USE_OLD_NAMES */ + +#if defined(__cplusplus) +} +#endif + +#endif /* DSFMT_H */ + +union random_val_t { + double d; + uint64_t u64; +}; + +typedef struct s_dsfmt_state { + dsfmt_t *state; + int has_uint32; + uint32_t uinteger; + + double *buffered_uniforms; + int buffer_loc; +} dsfmt_state; + +static inline double dsfmt_next_buffer(dsfmt_state *state) { + if (state->buffer_loc < DSFMT_N64) { + double out = state->buffered_uniforms[state->buffer_loc]; + state->buffer_loc++; + return out; + } + dsfmt_fill_array_close1_open2(state->state, state->buffered_uniforms, + DSFMT_N64); + state->buffer_loc = 1; + return state->buffered_uniforms[0]; +} + +static inline double dsfmt_next_double(dsfmt_state *state) { + return dsfmt_next_buffer(state) - 1.0; +} + +static inline uint64_t dsfmt_next64(dsfmt_state *state) { + /* Discard bottom 16 bits */ + uint64_t out; + union random_val_t rv; + rv.d = dsfmt_next_buffer(state); + out = (rv.u64 >> 16) << 32; + rv.d = dsfmt_next_buffer(state); + out |= (rv.u64 >> 16) & 0xffffffff; + return out; +} + +static inline uint32_t dsfmt_next32(dsfmt_state *state) { + /* Discard bottom 16 bits */ + union random_val_t rv; + rv.d = dsfmt_next_buffer(state); + // uint64_t *out = (uint64_t *)&d; + return (uint32_t)((rv.u64 >> 16) & 0xffffffff); +} + +static inline uint64_t dsfmt_next_raw(dsfmt_state *state) { + union random_val_t rv; + rv.d = dsfmt_next_buffer(state); + return rv.u64; +} + +void dsfmt_jump(dsfmt_state *state);
\ No newline at end of file diff --git a/numpy/random/src/entropy/LICENSE.md b/numpy/random/src/entropy/LICENSE.md new file mode 100644 index 000000000..b7276aad7 --- /dev/null +++ b/numpy/random/src/entropy/LICENSE.md @@ -0,0 +1,25 @@ +# ENTROPY + +_Parts of this module were derived from PCG_ + + +PCG Random Number Generation for C. + +Copyright 2014 Melissa O'Neill <oneill@pcg-random.org> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +For additional information about the PCG random number generation scheme, +including its license and other licensing options, visit + + http://www.pcg-random.org diff --git a/numpy/random/src/entropy/entropy.c b/numpy/random/src/entropy/entropy.c new file mode 100644 index 000000000..ead4bef83 --- /dev/null +++ b/numpy/random/src/entropy/entropy.c @@ -0,0 +1,174 @@ +/* + * PCG Random Number Generation for C. + * + * Copyright 2014 Melissa O'Neill <oneill@pcg-random.org> + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For additional information about the PCG random number generation scheme, + * including its license and other licensing options, visit + * + * http://www.pcg-random.org + */ + +/* This code provides a mechanism for getting external randomness for + * seeding purposes. Usually, it's just a wrapper around reading + * /dev/random. + * + * Alas, because not every system provides /dev/random, we need a fallback. + * We also need to try to test whether or not to use the fallback. + */ + +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include "entropy.h" +#ifdef _WIN32 +/* Windows */ +#include <sys/timeb.h> +#include <time.h> +#include <windows.h> + +#include <wincrypt.h> +#else +/* Unix */ +#include <sys/time.h> +#include <time.h> +#include <unistd.h> +#endif + +#ifndef IS_UNIX +#if !defined(_WIN32) && (defined(__unix__) || defined(__unix) || \ + (defined(__APPLE__) && defined(__MACH__))) +#define IS_UNIX 1 +#else +#define IS_UNIX 0 +#endif +#endif + +// If HAVE_DEV_RANDOM is set, we use that value, otherwise we guess +#ifndef HAVE_DEV_RANDOM +#define HAVE_DEV_RANDOM IS_UNIX +#endif + +#if HAVE_DEV_RANDOM +#include <fcntl.h> +#include <unistd.h> +#endif + +#if HAVE_DEV_RANDOM +/* entropy_getbytes(dest, size): + * Use /dev/random to get some external entropy for seeding purposes. + * + * Note: + * If reading /dev/random fails (which ought to never happen), it returns + * false, otherwise it returns true. If it fails, you could instead call + * fallback_entropy_getbytes which always succeeds. + */ + +bool entropy_getbytes(void *dest, size_t size) { + int fd = open("/dev/urandom", O_RDONLY); + if (fd < 0) + return false; + ssize_t sz = read(fd, dest, size); + if ((sz < 0) || ((size_t)sz < size)) + return false; + return close(fd) == 0; +} +#endif + +#ifdef _WIN32 +bool entropy_getbytes(void *dest, size_t size) { + HCRYPTPROV hCryptProv; + BOOL done; + + if (!CryptAcquireContext(&hCryptProv, NULL, NULL, PROV_RSA_FULL, + CRYPT_VERIFYCONTEXT) || + !hCryptProv) { + return true; + } + done = CryptGenRandom(hCryptProv, (DWORD)size, (unsigned char *)dest); + CryptReleaseContext(hCryptProv, 0); + if (!done) { + return false; + } + + return true; +} +#endif + +/* Thomas Wang 32/64 bits integer hash function */ +uint32_t entropy_hash_32(uint32_t key) { + key += ~(key << 15); + key ^= (key >> 10); + key += (key << 3); + key ^= (key >> 6); + key += ~(key << 11); + key ^= (key >> 16); + return key; +} + +uint64_t entropy_hash_64(uint64_t key) { + key = (~key) + (key << 21); // key = (key << 21) - key - 1; + key = key ^ (key >> 24); + key = (key + (key << 3)) + (key << 8); // key * 265 + key = key ^ (key >> 14); + key = (key + (key << 2)) + (key << 4); // key * 21 + key = key ^ (key >> 28); + key = key + (key << 31); + return key; +} + +uint32_t entropy_randombytes(void) { + +#ifndef _WIN32 + struct timeval tv; + gettimeofday(&tv, NULL); + return entropy_hash_32(getpid()) ^ entropy_hash_32(tv.tv_sec) ^ + entropy_hash_32(tv.tv_usec) ^ entropy_hash_32(clock()); +#else + uint32_t out = 0; + int64_t counter; + struct _timeb tv; + _ftime_s(&tv); + out = entropy_hash_32(GetCurrentProcessId()) ^ + entropy_hash_32((uint32_t)tv.time) ^ entropy_hash_32(tv.millitm) ^ + entropy_hash_32(clock()); + if (QueryPerformanceCounter((LARGE_INTEGER *)&counter) != 0) + out ^= entropy_hash_32((uint32_t)(counter & 0xffffffff)); + return out; +#endif +} + +bool entropy_fallback_getbytes(void *dest, size_t size) { + int hashes = (int)size; + uint32_t *hash = malloc(hashes * sizeof(uint32_t)); + // uint32_t hash[hashes]; + int i; + for (i = 0; i < hashes; i++) { + hash[i] = entropy_randombytes(); + } + memcpy(dest, (void *)hash, size); + free(hash); + return true; +} + +void entropy_fill(void *dest, size_t size) { + bool success; + success = entropy_getbytes(dest, size); + if (!success) { + entropy_fallback_getbytes(dest, size); + } +} diff --git a/numpy/random/src/entropy/entropy.h b/numpy/random/src/entropy/entropy.h new file mode 100644 index 000000000..785603dd3 --- /dev/null +++ b/numpy/random/src/entropy/entropy.h @@ -0,0 +1,48 @@ +#ifndef _RANDOMDGEN__ENTROPY_H_ +#define _RANDOMDGEN__ENTROPY_H_ +/* + * PCG Random Number Generation for C. + * + * Copyright 2014 Melissa O'Neill <oneill@pcg-random.org> + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For additional information about the PCG random number generation scheme, + * including its license and other licensing options, visit + * + * http://www.pcg-random.org + */ + +#include <stddef.h> +#ifdef _WIN32 +#if _MSC_VER == 1500 +#include "../common/stdint.h" +typedef int bool; +#define false 0 +#define true 1 +#else +#include <stdbool.h> +#include <stdint.h> +#endif +#else +#include <stdbool.h> +#include <stdint.h> +#endif + +extern void entropy_fill(void *dest, size_t size); + +extern bool entropy_getbytes(void *dest, size_t size); + +extern bool entropy_fallback_getbytes(void *dest, size_t size); + +#endif diff --git a/numpy/random/src/legacy/LICENSE.md b/numpy/random/src/legacy/LICENSE.md new file mode 100644 index 000000000..88b1791b2 --- /dev/null +++ b/numpy/random/src/legacy/LICENSE.md @@ -0,0 +1,30 @@ +Copyright (c) 2005-2017, NumPy Developers. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + +* Redistributions in binary form must reproduce the above + copyright notice, this list of conditions and the following + disclaimer in the documentation and/or other materials provided + with the distribution. + +* Neither the name of the NumPy Developers nor the names of any + contributors may be used to endorse or promote products derived + from this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/numpy/random/src/legacy/distributions-boxmuller.c b/numpy/random/src/legacy/distributions-boxmuller.c new file mode 100644 index 000000000..5d3ba27f8 --- /dev/null +++ b/numpy/random/src/legacy/distributions-boxmuller.c @@ -0,0 +1,214 @@ +#include "distributions-boxmuller.h" + +static NPY_INLINE double legacy_double(aug_brng_t *aug_state) { + return aug_state->basicrng->next_double(aug_state->basicrng->state); +} + +double legacy_gauss(aug_brng_t *aug_state) { + if (aug_state->has_gauss) { + const double temp = aug_state->gauss; + aug_state->has_gauss = false; + aug_state->gauss = 0.0; + return temp; + } else { + double f, x1, x2, r2; + + do { + x1 = 2.0 * legacy_double(aug_state) - 1.0; + x2 = 2.0 * legacy_double(aug_state) - 1.0; + r2 = x1 * x1 + x2 * x2; + } while (r2 >= 1.0 || r2 == 0.0); + + /* Polar method, a more efficient version of the Box-Muller approach. */ + f = sqrt(-2.0 * log(r2) / r2); + /* Keep for next call */ + aug_state->gauss = f * x1; + aug_state->has_gauss = true; + return f * x2; + } +} + +double legacy_standard_exponential(aug_brng_t *aug_state) { + /* We use -log(1-U) since U is [0, 1) */ + return -log(1.0 - legacy_double(aug_state)); +} + +double legacy_standard_gamma(aug_brng_t *aug_state, double shape) { + double b, c; + double U, V, X, Y; + + if (shape == 1.0) { + return legacy_standard_exponential(aug_state); + } + else if (shape == 0.0) { + return 0.0; + } else if (shape < 1.0) { + for (;;) { + U = legacy_double(aug_state); + V = legacy_standard_exponential(aug_state); + if (U <= 1.0 - shape) { + X = pow(U, 1. / shape); + if (X <= V) { + return X; + } + } else { + Y = -log((1 - U) / shape); + X = pow(1.0 - shape + shape * Y, 1. / shape); + if (X <= (V + Y)) { + return X; + } + } + } + } else { + b = shape - 1. / 3.; + c = 1. / sqrt(9 * b); + for (;;) { + do { + X = legacy_gauss(aug_state); + V = 1.0 + c * X; + } while (V <= 0.0); + + V = V * V * V; + U = legacy_double(aug_state); + if (U < 1.0 - 0.0331 * (X * X) * (X * X)) + return (b * V); + if (log(U) < 0.5 * X * X + b * (1. - V + log(V))) + return (b * V); + } + } +} + +double legacy_gamma(aug_brng_t *aug_state, double shape, double scale) { + return scale * legacy_standard_gamma(aug_state, shape); +} + +double legacy_pareto(aug_brng_t *aug_state, double a) { + return exp(legacy_standard_exponential(aug_state) / a) - 1; +} + +double legacy_weibull(aug_brng_t *aug_state, double a) { + if (a == 0.0) { + return 0.0; + } + return pow(legacy_standard_exponential(aug_state), 1. / a); +} + +double legacy_power(aug_brng_t *aug_state, double a) { + return pow(1 - exp(-legacy_standard_exponential(aug_state)), 1. / a); +} + +double legacy_chisquare(aug_brng_t *aug_state, double df) { + return 2.0 * legacy_standard_gamma(aug_state, df / 2.0); +} + +double legacy_noncentral_chisquare(aug_brng_t *aug_state, double df, + double nonc) { + double out; + if (nonc == 0) { + return legacy_chisquare(aug_state, df); + } + if (1 < df) { + const double Chi2 = legacy_chisquare(aug_state, df - 1); + const double n = legacy_gauss(aug_state) + sqrt(nonc); + return Chi2 + n * n; + } else { + const long i = random_poisson(aug_state->basicrng, nonc / 2.0); + out = legacy_chisquare(aug_state, df + 2 * i); + /* Insert nan guard here to avoid changing the stream */ + if (npy_isnan(nonc)){ + return NPY_NAN; + } else { + return out; + } + } +} + +double legacy_noncentral_f(aug_brng_t *aug_state, double dfnum, double dfden, + double nonc) { + double t = legacy_noncentral_chisquare(aug_state, dfnum, nonc) * dfden; + return t / (legacy_chisquare(aug_state, dfden) * dfnum); +} + +double legacy_wald(aug_brng_t *aug_state, double mean, double scale) { + double U, X, Y; + double mu_2l; + + mu_2l = mean / (2 * scale); + Y = legacy_gauss(aug_state); + Y = mean * Y * Y; + X = mean + mu_2l * (Y - sqrt(4 * scale * Y + Y * Y)); + U = legacy_double(aug_state); + if (U <= mean / (mean + X)) { + return X; + } else { + return mean * mean / X; + } +} + +double legacy_normal(aug_brng_t *aug_state, double loc, double scale) { + return loc + scale * legacy_gauss(aug_state); +} + +double legacy_lognormal(aug_brng_t *aug_state, double mean, double sigma) { + return exp(legacy_normal(aug_state, mean, sigma)); +} + +double legacy_standard_t(aug_brng_t *aug_state, double df) { + double num, denom; + + num = legacy_gauss(aug_state); + denom = legacy_standard_gamma(aug_state, df / 2); + return sqrt(df / 2) * num / sqrt(denom); +} + +int64_t legacy_negative_binomial(aug_brng_t *aug_state, double n, double p) { + double Y = legacy_gamma(aug_state, n, (1 - p) / p); + return random_poisson(aug_state->basicrng, Y); +} + +double legacy_standard_cauchy(aug_brng_t *aug_state) { + return legacy_gauss(aug_state) / legacy_gauss(aug_state); +} + +double legacy_beta(aug_brng_t *aug_state, double a, double b) { + double Ga, Gb; + + if ((a <= 1.0) && (b <= 1.0)) { + double U, V, X, Y; + /* Use Johnk's algorithm */ + + while (1) { + U = legacy_double(aug_state); + V = legacy_double(aug_state); + X = pow(U, 1.0 / a); + Y = pow(V, 1.0 / b); + + if ((X + Y) <= 1.0) { + if (X + Y > 0) { + return X / (X + Y); + } else { + double logX = log(U) / a; + double logY = log(V) / b; + double logM = logX > logY ? logX : logY; + logX -= logM; + logY -= logM; + + return exp(logX - log(exp(logX) + exp(logY))); + } + } + } + } else { + Ga = legacy_standard_gamma(aug_state, a); + Gb = legacy_standard_gamma(aug_state, b); + return Ga / (Ga + Gb); + } +} + +double legacy_f(aug_brng_t *aug_state, double dfnum, double dfden) { + return ((legacy_chisquare(aug_state, dfnum) * dfden) / + (legacy_chisquare(aug_state, dfden) * dfnum)); +} + +double legacy_exponential(aug_brng_t *aug_state, double scale) { + return scale * legacy_standard_exponential(aug_state); +} diff --git a/numpy/random/src/legacy/distributions-boxmuller.h b/numpy/random/src/legacy/distributions-boxmuller.h new file mode 100644 index 000000000..445686e6c --- /dev/null +++ b/numpy/random/src/legacy/distributions-boxmuller.h @@ -0,0 +1,40 @@ +#ifndef _RANDOMDGEN__DISTRIBUTIONS_LEGACY_H_ +#define _RANDOMDGEN__DISTRIBUTIONS_LEGACY_H_ + + +#include "../distributions/distributions.h" + +typedef struct aug_brng { + brng_t *basicrng; + int has_gauss; + double gauss; +} aug_brng_t; + +extern double legacy_gauss(aug_brng_t *aug_state); +extern double legacy_standard_exponential(aug_brng_t *aug_state); +extern double legacy_pareto(aug_brng_t *aug_state, double a); +extern double legacy_weibull(aug_brng_t *aug_state, double a); +extern double legacy_power(aug_brng_t *aug_state, double a); +extern double legacy_gamma(aug_brng_t *aug_state, double shape, double scale); +extern double legacy_pareto(aug_brng_t *aug_state, double a); +extern double legacy_weibull(aug_brng_t *aug_state, double a); +extern double legacy_chisquare(aug_brng_t *aug_state, double df); +extern double legacy_noncentral_chisquare(aug_brng_t *aug_state, double df, + double nonc); + +extern double legacy_noncentral_f(aug_brng_t *aug_state, double dfnum, + double dfden, double nonc); +extern double legacy_wald(aug_brng_t *aug_state, double mean, double scale); +extern double legacy_lognormal(aug_brng_t *aug_state, double mean, + double sigma); +extern double legacy_standard_t(aug_brng_t *aug_state, double df); +extern int64_t legacy_negative_binomial(aug_brng_t *aug_state, double n, + double p); +extern double legacy_standard_cauchy(aug_brng_t *state); +extern double legacy_beta(aug_brng_t *aug_state, double a, double b); +extern double legacy_f(aug_brng_t *aug_state, double dfnum, double dfden); +extern double legacy_normal(aug_brng_t *aug_state, double loc, double scale); +extern double legacy_standard_gamma(aug_brng_t *aug_state, double shape); +extern double legacy_exponential(aug_brng_t *aug_state, double scale); + +#endif diff --git a/numpy/random/src/mt19937/LICENSE.md b/numpy/random/src/mt19937/LICENSE.md new file mode 100644 index 000000000..f65c3d46e --- /dev/null +++ b/numpy/random/src/mt19937/LICENSE.md @@ -0,0 +1,61 @@ +# MT19937 + +Copyright (c) 2003-2005, Jean-Sebastien Roy (js@jeannot.org) + +The rk_random and rk_seed functions algorithms and the original design of +the Mersenne Twister RNG: + + Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions + are met: + + 1. Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + 2. Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + 3. The names of its contributors may not be used to endorse or promote + products derived from this software without specific prior written + permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +Original algorithm for the implementation of rk_interval function from +Richard J. Wagner's implementation of the Mersenne Twister RNG, optimised by +Magnus Jonsson. + +Constants used in the rk_double implementation by Isaku Wada. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sublicense, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice shall be included +in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. +IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY +CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
\ No newline at end of file diff --git a/numpy/random/src/mt19937/mt19937-benchmark.c b/numpy/random/src/mt19937/mt19937-benchmark.c new file mode 100644 index 000000000..039f8030a --- /dev/null +++ b/numpy/random/src/mt19937/mt19937-benchmark.c @@ -0,0 +1,31 @@ +/* + * cl mt19937-benchmark.c mt19937.c /Ox + * Measure-Command { .\mt19937-benchmark.exe } + * + * gcc mt19937-benchmark.c mt19937.c -O3 -o mt19937-benchmark + * time ./mt19937-benchmark + */ +#include "mt19937.h" +#include <inttypes.h> +#include <stdio.h> +#include <time.h> + +#define Q 1000000000 + +int main() { + int i; + uint32_t seed = 0x0; + uint64_t sum = 0, count = 0; + mt19937_state state; + mt19937_seed(&state, seed); + clock_t begin = clock(); + for (i = 0; i < Q; i++) { + sum += mt19937_next64(&state); + count++; + } + clock_t end = clock(); + double time_spent = (double)(end - begin) / CLOCKS_PER_SEC; + printf("0x%" PRIx64 "\ncount: %" PRIu64 "\n", sum, count); + printf("%" PRIu64 " randoms per second\n", + (uint64_t)(Q / time_spent) / 1000000 * 1000000); +} diff --git a/numpy/random/src/mt19937/mt19937-jump.c b/numpy/random/src/mt19937/mt19937-jump.c new file mode 100644 index 000000000..46b28cf96 --- /dev/null +++ b/numpy/random/src/mt19937/mt19937-jump.c @@ -0,0 +1,224 @@ +#include "mt19937-jump.h" +#include "mt19937.h" + +/* 32-bits function */ +/* return the i-th coefficient of the polynomial pf */ +unsigned long get_coef(unsigned long *pf, unsigned int deg) { + if ((pf[deg >> 5] & (LSB << (deg & 0x1ful))) != 0) + return (1); + else + return (0); +} + +/* 32-bit function */ +/* set the coefficient of the polynomial pf with v */ +void set_coef(unsigned long *pf, unsigned int deg, unsigned long v) { + if (v != 0) + pf[deg >> 5] ^= (LSB << (deg & 0x1ful)); + else + ; +} + +void gray_code(unsigned long *h) { + unsigned int i, j = 1, l = 1, term = LL; + + h[0] = 0; + + for (i = 1; i <= QQ; i++) { + l = (l << 1); + term = (term >> 1); + for (; j < l; j++) + h[j] = h[l - j - 1] ^ term; + } +} + +void copy_state(mt19937_state *target_state, mt19937_state *state) { + int i; + + for (i = 0; i < N; i++) + target_state->key[i] = state->key[i]; + + target_state->pos = state->pos; +} + +/* next state generating function */ +void gen_next(mt19937_state *state) { + int num; + unsigned long y; + static unsigned long mag02[2] = {0x0ul, MATRIX_A}; + + num = state->pos; + if (num < N - M) { + y = (state->key[num] & UPPER_MASK) | (state->key[num + 1] & LOWER_MASK); + state->key[num] = state->key[num + M] ^ (y >> 1) ^ mag02[y % 2]; + state->pos++; + } else if (num < N - 1) { + y = (state->key[num] & UPPER_MASK) | (state->key[num + 1] & LOWER_MASK); + state->key[num] = state->key[num + (M - N)] ^ (y >> 1) ^ mag02[y % 2]; + state->pos++; + } else if (num == N - 1) { + y = (state->key[N - 1] & UPPER_MASK) | (state->key[0] & LOWER_MASK); + state->key[N - 1] = state->key[M - 1] ^ (y >> 1) ^ mag02[y % 2]; + state->pos = 0; + } +} + +void add_state(mt19937_state *state1, mt19937_state *state2) { + int i, pt1 = state1->pos, pt2 = state2->pos; + + if (pt2 - pt1 >= 0) { + for (i = 0; i < N - pt2; i++) + state1->key[i + pt1] ^= state2->key[i + pt2]; + for (; i < N - pt1; i++) + state1->key[i + pt1] ^= state2->key[i + (pt2 - N)]; + for (; i < N; i++) + state1->key[i + (pt1 - N)] ^= state2->key[i + (pt2 - N)]; + } else { + for (i = 0; i < N - pt1; i++) + state1->key[i + pt1] ^= state2->key[i + pt2]; + for (; i < N - pt2; i++) + state1->key[i + (pt1 - N)] ^= state2->key[i + pt2]; + for (; i < N; i++) + state1->key[i + (pt1 - N)] ^= state2->key[i + (pt2 - N)]; + } +} + +/* +void gen_vec_h(mt19937_state *state, mt19937_state *vec_h, + unsigned long *h) { + int i; + unsigned long k, g; + mt19937_state v; + + gray_code(h); + + copy_state(&vec_h[0], state); + + for (i = 0; i < QQ; i++) + gen_next(&vec_h[0]); + + for (i = 1; i < LL; i++) { + copy_state(&v, state); + g = h[i] ^ h[i - 1]; + for (k = 1; k < g; k = (k << 1)) + gen_next(&v); + copy_state(&vec_h[h[i]], &vec_h[h[i - 1]]); + add_state(&vec_h[h[i]], &v); + } +} +*/ + +/* compute pf(ss) using Sliding window algorithm */ +/* +void calc_state(unsigned long *pf, mt19937_state *state, + mt19937_state *vec_h) { + mt19937_state *temp1; + int i = MEXP - 1, j, digit, skip = 0; + + temp1 = (mt19937_state *)calloc(1, sizeof(mt19937_state)); + + while (get_coef(pf, i) == 0) + i--; + + for (; i >= QQ; i--) { + if (get_coef(pf, i) != 0) { + for (j = 0; j < QQ + 1; j++) + gen_next(temp1); + digit = 0; + for (j = 0; j < QQ; j++) + digit = (digit << 1) ^ get_coef(pf, i - j - 1); + add_state(temp1, &vec_h[digit]); + i -= QQ; + } else + gen_next(temp1); + } + + for (; i > -1; i--) { + gen_next(temp1); + if (get_coef(pf, i) == 1) + add_state(temp1, state); + else + ; + } + + copy_state(state, temp1); + free(temp1); +} +*/ + +/* compute pf(ss) using standard Horner method */ +void horner1(unsigned long *pf, mt19937_state *state) { + int i = MEXP - 1; + mt19937_state *temp; + + temp = (mt19937_state *)calloc(1, sizeof(mt19937_state)); + + while (get_coef(pf, i) == 0) + i--; + + if (i > 0) { + copy_state(temp, state); + gen_next(temp); + i--; + for (; i > 0; i--) { + if (get_coef(pf, i) != 0) + add_state(temp, state); + else + ; + gen_next(temp); + } + if (get_coef(pf, 0) != 0) + add_state(temp, state); + else + ; + } else if (i == 0) + copy_state(temp, state); + else + ; + + copy_state(state, temp); + free(temp); +} + +void mt19937_jump_state(mt19937_state *state, const char *jump_str) { + unsigned long *pf; + int i; + + pf = (unsigned long *)calloc(P_SIZE, sizeof(unsigned long)); + + for (i = MEXP - 1; i > -1; i--) { + if (jump_str[i] == '1') + set_coef(pf, i, 1); + } + /* TODO: Should generate the next set and start from 0, but doesn't matter ?? + */ + if (state->pos >= N) { + state->pos = 0; + } + + horner1(pf, state); + + free(pf); +} +/* +void mt19937_jump(mt19937_state *state, const char *jump_str) +{ + unsigned long h[LL]; + mt19937_state vec_h[LL]; + unsigned long *pf; + int i; + + pf = (unsigned long *)calloc(P_SIZE, sizeof(unsigned long)); + + for (i = MEXP - 1; i > -1; i--) + { + if (jump_str[i] == '1') + set_coef(pf, i, 1); + } + + gen_vec_h(state, &vec_h, &h); + calc_state(pf, state, &vec_h); + + free(pf); +} +*/
\ No newline at end of file diff --git a/numpy/random/src/mt19937/mt19937-jump.h b/numpy/random/src/mt19937/mt19937-jump.h new file mode 100644 index 000000000..394c150a0 --- /dev/null +++ b/numpy/random/src/mt19937/mt19937-jump.h @@ -0,0 +1,15 @@ +#pragma once +#include "mt19937.h" +#include <stdlib.h> + +/* parameters for computing Jump */ +#define W_SIZE 32 /* size of unsigned long */ +#define MEXP 19937 +#define P_SIZE ((MEXP / W_SIZE) + 1) +#define LSB 0x00000001UL +#define QQ 7 +#define LL 128 /* LL = 2^(QQ) */ + +void mt19937_jump_state(mt19937_state *state, const char *jump_str); + +void set_coef(unsigned long *pf, unsigned int deg, unsigned long v);
\ No newline at end of file diff --git a/numpy/random/src/mt19937/mt19937-poly.h b/numpy/random/src/mt19937/mt19937-poly.h new file mode 100644 index 000000000..b03747881 --- /dev/null +++ b/numpy/random/src/mt19937/mt19937-poly.h @@ -0,0 +1,207 @@ +static const char * poly = +"0001000111110111011100100010101111000000010100100101000001110111100010101000110100101001011001010" +"1110101101100101011100101101011001110011100011110100001000001011100101100010100000010011101110011" +"0100001001111010000100100101001011100111101101001100000111001000011101100100010000001111110100010" +"0000111101000101000101101111001011000011001001001011010011001000001000011100100010110101111111101" +"0010001001100010011011101111101110111010111000010000011010110011111101100000100100101001010000001" +"1001111000011010011101001101011000111001110010110000011000110101111010110011011000001110110010001" +"1001101011011101000011001011111111100011001010111100000001111011111101000101000011000011111100101" +"0100001111101010101100000110100110010010101011011100110011000101100101011110010101110000101011100" +"0001010100010110100000111001100000011101011001101000001000101101010100010101100000100011110110011" +"0101100110111101010111100010100110100011111011100111000001110110010000000100000110101010111001111" +"0011110010000110101101010001110010100111111111100100101010010011101111011000010111101001110110110" +"1011101101101100110111000100101100111001011111110101001000011111010011000111110011100100001101111" +"1001010110110001000100001001000010000000001011011100101010010100011000110101001000010101100111101" +"0011110101100110111100000111001011011001100101111011000101001011011111110110100010001100101001100" +"1111110011111111110111011011100011000100110011011011011001101011100110010001111100001111100100001" +"1000100011001010100101010100111110001100111111011111100100011110011101101000110100101110010111111" +"1001010110000101001110010110001011011010101111111001110001100100011001000010111001011011000111100" +"1101001011110111111010011000110100001010000000101010101001111101111110101111110101110101010010100" +"1100100101010110011111001101100110001011000101010001000110011011111101111110001100000010110110101" +"1111110100001011101011101110111101100001111000011100000110110100100100100101011000111000100110001" +"0110110001001000111110101111000000100100010100100101101111100011010100111101110010000001011111111" +"1101010000011001010101111001111110001111100010100010100001011001110001010010100001011111110110111" +"1100100100001111000111110111000100010101010110100111100001011001101001111101001110010110110011010" +"1000010011000110000000110110110000111010010000111001100010100101010101111100010111000000011101110" +"1100011010110001101100110000001010001100111101101011100111110111000110010011011011001101001111100" +"1011111001100011010110101111100110111101011100000011000010001010001101001011000001111000101000100" +"0110001011001010110000001101100000011000011110010000101000011010011110001101111111010010101100100" +"1111010100000011011001111111011011111001101110101010110111110110101000100001011110111010100111100" +"0000001001111100111111111000100000100100010001011001100001111100100000001111011101100010011000111" +"0011110110100011011001110011100011011000010000000101101101001010111000010000010101111110000000100" +"1011010100001001000011001100011000000111100111100101010100000111000000110111011101011111100010101" +"0011001100110000010101111001000111001001010100011000110010011011101001001100101100000000111000111" +"0111111000010010010100000101010010000100101011111111111001100101101010011010100010111001011100011" +"1001001011010000110000111100010110110100000100110010000010010000001000110010101000110101101100100" +"0001100001100011110110010000100000100010011001010010110111100011011000101011001100001111110110110" +"0001100110010100011001101000100001110011011111101001101011110011011011111110111110101110010011001" +"1000000101100000101100100000100000001011000100100001100100101101010111101010111101010001001010110" +"0011111011001101001110110010100100000011001001111010001001100101110000000010111101000111111101010" +"0110101110101110001001110000111110100000101101100110010001111101111011001000101110111010110111110" +"0011001101011010001011000010000111111111101001011100110101011000000001111000101100011101011011100" +"1111101110000000000110001110011001101100111111010001110000111110100011000100001100110010000110111" +"1001011011001111011100000000011011000100000011000010010111000111101000011001001100011010001111000" +"0011110010100010001101011101010011001100000010101001001101111101000111001110110000000010111101001" +"1110110011101110111010011100101001010101100100011111100110001111011111110010100000011100110110001" +"1011100000101000010100011101000010111100101111101100110001010001010000101110000000110100010110011" +"1111110100101010011010100001100110110110011111110010000100001010011110010110001000000100000111000" +"0111001010011001000010111001100110100110110101111011110111001001000101010010010011000111110010101" +"1100110001100101001000010001101010011001110011001110001110010100010000000000000110111001010101000" +"0111111011011101000111011001011011000101110100010001111100101110000100001011111101111101010011001" +"0010001100011011101100010010101011001000001001010101100110001111001110011100110111111010110010001" +"1111111101111001001101101001001010011001110000101000110010111110010110111111000100101000101011010" +"0000101101101100000110101000101000010001111000100000111110011111111110010010001010001111011001100" +"0011110111000000111111000100001111101110100010101011001010110110011001010010001011100001010110101" +"0100000010101101000011001101110010000010110011000101100100000111111100011001110011010011001110000" +"1110011110000000001001001010100000111001010110001110011100011010010010001110010011001010111100000" +"1110000101101001011010001001010000111000010011010100001010110000101101110110011000011100111100001" +"1001000011010001110110111001100100001111110010110010011111000010100000001101110100000000101101000" +"0011000000100011000111110001000011100111110110000110101111101100011110100111111000000011011110110" +"1101011010111010010001001101000110110010000010101000000001100100100000001111011001001010110100011" +"1011000010101111010111000001001100111110000010110010011011110011111001000101111011010011010100001" +"0110011111100001011111101010010100110001001001001000100010101011011000011100111000110101110000001" +"1100001111100011110010000101011000010101111010001101010101100001100101100000100100000101011001100" +"0011001000101010101010100111000100100010101000111111101010000000101010101001000101010100100111001" +"1001100001010001100110111101010001111010011100000001001110100010010011110100001000011111100010001" +"0010001000100110101011001110100110101110110110100101111000110101101101001000001110011010110011001" +"0111111101011011101001111001011100001010110111000001100010110110100011010111011000111010100011000" +"1111010110001001010000110001000101101100010100000000100001111100000010111001000011000101010100001" +"0001101100011100010100101110010100000010011011010100000111110110000110101011011010010001110000111" +"0110101000110101110010011100010010100111001101110110010001101001101101010100001010001110111011011" +"1010011001010111101001011000100111001110011000000001101000001111001100001100000011001110100110011" +"0011000110001001010111111111110110111111000111100010010101110000101100101000001010001011010100010" +"1010010100010011101111100111010010010001110101011110110100001000001001000111001110010001001100100" +"1100100010001010011011110100000101101011101010110110100100010001110000111010111001111011111001011" +"0000000000011000100100100111001000101111000000110001011110101111110111100000000100101011000111011" +"1011010011101000001011001001110001111010000100101101010111001010001000100001000111011010000110111" +"1010110001001110001100001110011000101100000101100000000110101000000110101100100101110001100100100" +"0110000110101011100001010001010000011101111011111011011000100100101011110101111000001011110010110" +"0111011011100111101010110001111011010011111000010111110100001001010001011001000110111100000101011" +"0010111111010100000110111101001100000100001011101010100011010010000001101100100101001000100011000" +"0101010111111100100000111011101111100000011011111111010001100011001100101101011110101011101100001" +"0100010011101111111011000111111101001000101101111001111000101110010111001010101011010111000000101" +"0110010000010010101111100010111110000000011101001000011111001011111100111100100101100101111010110" +"1010101001110011111100111110100000111100100000111111000010100001111011111110110010001001000000000" +"1110100110010111100101111111001010001111001101100001011000111011100010100001000010100000011001000" +"0000111000110111001001100010111010100111111001111101100101000011001001110011100110101110001101110" +"1110000010110110010110000111001110110000011011100111000101100101000000001110011011001001111001111" +"0000101100001000000111100110110000110111111001101001111111010000001011110011011011100100110000110" +"1001011111101100100111111000000010001110111011010011011101001100000011001010000010101111111010110" +"0001000100101110101101100001001010100110010000110110100110011001000111011110110011001110111110101" +"0000011111011011001111010010101011000010011101001011100001010001111001000110000010000101010011111" +"0110011000001111101001110001101011111111001010010110100001101000000011101000101011101000110101111" +"0000101110011010010000110100000101100011000100101111100011001111011101001010100111001110100001101" +"0000110111011000000110011001101011110000101100110110000101100000110110100001001001110001110001001" +"1100110111111100101001100010010110011011110001000111111111001101111110010000011001011010111101001" +"1101111110101110110100101100110001101101001010111101101000000011111111100101000101110001000011001" +"1000111110111011010010101011110110110001010001001001100111111010011101111000000111011000011010011" +"0111010101001110010100101101000110000110001100010101001110101011010100000110110111111111110011110" +"0100011110100011001000110101111010000001011011110101001100111100010100101100010000010110011001111" +"0011011110001110010010100100011111110000110011011100010110110101001110011010101111011001010101011" +"1001001111001000001100100111000001000110110101100111000101011000000100001000100010011000001110011" +"0000111100000111001101011111010000010001100000010101101000111100001000010011110000001011001001100" +"0011011011111011100000111101001011101000010010001001111110010101111010110101101110110111010000101" +"1100011000000000110110100011010100100010001101010101101110110111111011010110011101011010110101011" +"1101000000010010011111000000101000110001000011100001101111010101100000100000100111111111100000000" +"0011100011100101110010111100010111110010101110101000011000111111001110111111000001101101011011111" +"1100110101001000011111001111000000001010001001010101101000001100111010101100010111001001111100000" +"1110101101110001011100011101101100001001001011100111100110011101111000100010010001111100001010010" +"1011001001010100101100010010000110010000101010111111001000011100000000101101110010001101110101001" +"1110000011100101010000011110000010001000001010110001010000100111001100110001111000100100011100110" +"1100010011110111001001100000100111001010000000000011100011111111101110010101111010100010000100001" +"0101101001010111111110000110110010100000001011110100010111110111010000001011110110111000000110010" +"0001100100111110001100010101000010011111100000100010000101110000111001101100100000011111111100010" +"1001101101001000001111000100100001010110111011110110001001010001110001001100011001001100000000101" +"1100011110101101011001100001010110001010000111100000011011011001000010101100010101110011001101110" +"0000101011010001010011111001011000010101010100110110111110101000111110001000010100000000100010100" +"1000111111000110110010001111000010101011101101111101011110101111100111111100111101000101000010011" +"0010111010100010011001000000010111100010000101001011001101100011100001001111010100100110101111111" +"1000010011110101001010011111111011101001110100001001100010000100001001100101101111011100100011001" +"1111010001011001111101011110101101000111110101001010011101010010010101001000000000011001100110001" +"0001000010101010101000010100111000001110000111001110001101111111000010101010111001011101001001011" +"0011001111011010101110101111110001001100100111010001011000010100000100000001001100000011000011101" +"1100000110000001011001110000101001010111101000110101000011000000111011100101010000111000010010101" +"1010100101100001011011011110110011000100100101010011111101000000100001001101000011000101010111101" +"1110111111100010111000111000010110111010010110000000000100101001000111101101100000000110111011001" +"0100000000100100011110111011101101101101010110001110100001100001001011000000111111110100011110011" +"0010000010000000010100110011110000000010000011111000111101011110000000000010101101001100000010010" +"1011001001101110110011100001100011101001101011110011010001011101000100011111001010100000011111111" +"1010101100000010001000110000110000101000110100110011100000110010110100011111010001000011100001001" +"1000101000010111111011100010111000111001010100110000000010011011101010101111000110001000110111011" +"1011100001100011010001101011010100110110011100000010111001011111110010100110100010001100000011100" +"0001011001011000101011010000001010011010001011000111000011000011110011111001111010001101011010010" +"0010010001001001101000101001011011101110001100010001010100010111111001100100000010001111100010111" +"0100001111001100101001011101010010110010100010001100011010100110000100011010111110001011011001000" +"1001001111011010010011101110100001111100000110101001010111110001101100110010111010111001011111010" +"1110111011111110000001110010000010011111000111011011000011000010011110011111111101100101111011100" +"0101101100000110101110000111111111111010110101010100111000011111011001100000100000101011000101110" +"1011010010100000000100100000010111101110111001000011111011111110100011010010000110001101111101100" +"1100010111001011011001011001010100100110100101001000111011011001100011001010010101111001100100110" +"1000110000111011100101110101101000011001010010100011000001111001110110101101010010110110001100100" +"0100001011101100111001010001111011010110010010110010110111110001001001111001111010010001010001101" +"1110100110101100011110100100110111000111010110011000100100110110001101111100111110100001000110000" +"1110011011001101100101100000001010100011101000010100111011111100011010000110000001011100010000101" +"0100101000010001110010001100010110011111111101111000011001110111011100110010010100100010001000010" +"0100001110010000011000110001101011101001110100100011011001000111010101110100110011010111001100001" +"0100001001101010010111110101110111000000010100111101011010101001000001001000001000101101111000000" +"0110000101110100001111001101110111011110010111101000100101110111010101001101100001110001101101101" +"0010101100100101000100100100110111000111000111100111000001100001000111101011000110111110001010000" +"0100110010001101100011010111000111010111000111110000110000101111101110010110111001011000111010001" +"1011000010010101010010011001000011010110111011010001001010100111001000010110110110101110000110000" +"1110110010011001011011000100011101001001000111011100100000000000100001101101000101000100000111001" +"0011100001100110101011011101110111101111000100100011100001010001011001110010101010001110101101110" +"1011001110111111111010101101000010111111011011011100011100101010001011011100011111011100101011000" +"1000110100101000011111010011110000000101101110010000101100001000100000000010010110000000000110011" +"1000000000001111001001000100000111001110111111001111101100001100111000101100011000100111111110011" +"1110010101011010111100110010110001010000101111111101001010100010001001111010111000010000010010001" +"1111111101100100001101011011100001010101000111110111111101011010011111111101000111011001011011000" +"0000101011100011101110110011101111011110011110010000011001111001110111011011111010011011001110111" +"0101100111110100000100010110010010101001010100010111000101111001011011001001110010100011101111110" +"1101011110010101101011010010011111110000011010011101000000010000111010100100111110111000001101010" +"0101100001111001111010101011110001001010000011010110010100011100100100111110100110000010011111001" +"0100010011001001010101110111111010011101101100000101011110111010011110001111110100111011110011010" +"0111001010110101010110000011001010000000101101010101001101011000011011010110101010101111101101100" +"1100100000111101010111011011011110011001100010010000010100101000111111101011100111010101011000111" +"1100110010101100010011111100000110011111101011100100001110001100001010101001001100010011001000100" +"1101101000101101110010000001101001001110101111000110111000011101111110100100110111000000101011110" +"0001100100001010101001101111001000001100000011010000100101100000001110100010010000110110101010111" +"1100010100000110011100101010111110010110111100000010110011011001011110111001010011011110010001110" +"1101110000001011101101011111101011111110110110000111110011101100110100010000100000110100010010110" +"0011000011000110101001110100111010110000100010110101110111100010110001000111100111001011011110010" +"0001001110101001101101011010111001001101100011101001011011001110011010001010110100111001111100101" +"1000111001010010000010111010101110001100110111111000011101001000001010010011101000111001100111110" +"1110100100100110010111111101010011101111011011111011011010011110100101100001011000001001001010010" +"1100001000000110110011011101010001011110010001001110110100100001101101001011101010001110111111010" +"1100011100101000011110111110110011111111100010110010110111010010001111101110011011010110000001000" +"0010110100010101110100001000010011100110001110001110010100010010010110011100100110010100001110011" +"1100001011010000001101011011011110100000001110100111001000101000001000001001000010000111010000100" +"0111100000101010110010111010010101100000001100110101001001000110001110111011110001010010010011000" +"1100001111101101100001111000101100110010001000111001101101011110100110100011101000011111011010101" +"0101000011111010010110001001100110110111000100100011011101000010001010110001111001111101110001111" +"0100100000010111010011111110000101001001011110100100010011101110011010100101100001010000001110100" +"0011111101111000100110011000011001100100001010110011111100111010100011110100010101011110011001000" +"0000110000100100001011101110111010001001011110010101111100001111101101111011011110001010000100010" +"1001100100100100110010010101100110000000100000000111110011100111101001010000010000000000101011100" +"0011101011100110000001100101010101011111111011010011110010011011001010011101010010100010001011010" +"1100010011101011010111110100001010100011000011001001011011101111110011001110010001100101011001101" +"0100010001111111100000101000001011010100011100111011010111001100110110001100110101000011010001010" +"1011100001001010011110001010100100001101110011101011100100101010001100110011110010001100100001000" +"0110001001110110010111101011101101010111001010011010101110000010100010000111011000010110011000001" +"1000110010100001110001100010010000001101111110000010010110100000000000001111110010001110111100001" +"0100111101000011101110010101011011000101011010111100111111001011110001110011110011011010010111101" +"1010111011101101000001110111001010011001110010100100100100001010001100101010111001110100000110111" +"1010000111000011101101100101101001100000011100100111100110010110011100101000111110111000110111110" +"1101100101011101100111011111111001111000011110111110101100000111000101100100110111000010100101000" +"0110000011011101111101111000110101011000010111010000111011000000100011101010100111001111101010111" +"0001110100001000100001011101001010001110100000101100001011101111100111101011111001111100101101111" +"0101100001110011111110110100110010000011011111101101110110000110110011100110111000111101000010111" +"0111101011100100000000011101111011000100001000111000000111011010101010110000111111101010110001111" +"0000110100111101111011001010101110000011001101001101000010011001101011111110111101010111010011100" +"0101010011001111101111001100101000101000111110111001011111100000001101111011000001001100111111111" +"1010111101000001111011110010001001001110100111110010000011110000011000000101001100011110110011001" +"1010101001000010001010110000010011110101011110010111010001010111101100001001100011101001111101001" +"0110110100111001110011100011111010010010100010111000001100001011010010000100100110101010111001001" +"0110000101011011011100110111111001010000001001011010101010010001011010111100111010101101000101101" +"0100100001011101110111111001111111110110111011000101010000010000011111001000100101100100100110110" +"1100000111110010110011010100000100011111110001110010110001000001001111001101110110110101101010111" +"0000100111101100010001110010110111100011100101100011"; diff --git a/numpy/random/src/mt19937/mt19937-test-data-gen.c b/numpy/random/src/mt19937/mt19937-test-data-gen.c new file mode 100644 index 000000000..4f4ec1d64 --- /dev/null +++ b/numpy/random/src/mt19937/mt19937-test-data-gen.c @@ -0,0 +1,59 @@ +/* + * Generate testing csv files + * + * cl mt19937-test-data-gen.c randomkit.c + * -IC:\Anaconda\Lib\site-packages\numpy\core\include -IC:\Anaconda\include + * Advapi32.lib Kernel32.lib C:\Anaconda\libs\python36.lib -DRK_NO_WINCRYPT=1 + * + */ +#include "randomkit.h" +#include <inttypes.h> +#include <stdio.h> + +#define N 1000 + +int main() { + uint64_t sum = 0; + uint32_t seed = 0xDEADBEAF; + int i; + rk_state state; + rk_seed(seed, &state); + uint64_t store[N]; + for (i = 0; i < N; i++) { + store[i] = (uint64_t)rk_random(&state); + } + + FILE *fp; + fp = fopen("mt19937-testset-1.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx32 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); + + seed = 0; + rk_seed(seed, &state); + for (i = 0; i < N; i++) { + store[i] = (uint64_t)rk_random(&state); + } + fp = fopen("mt19937-testset-2.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx32 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); +} diff --git a/numpy/random/src/mt19937/mt19937.c b/numpy/random/src/mt19937/mt19937.c new file mode 100644 index 000000000..e5ca9e0cf --- /dev/null +++ b/numpy/random/src/mt19937/mt19937.c @@ -0,0 +1,107 @@ +#include "mt19937.h" +#include "mt19937-jump.h" +#include "mt19937-poly.h" + +void mt19937_seed(mt19937_state *state, uint32_t seed) { + int pos; + seed &= 0xffffffffUL; + + /* Knuth's PRNG as used in the Mersenne Twister reference implementation */ + for (pos = 0; pos < RK_STATE_LEN; pos++) { + state->key[pos] = seed; + seed = (1812433253UL * (seed ^ (seed >> 30)) + pos + 1) & 0xffffffffUL; + } + state->pos = RK_STATE_LEN; +} + +/* initializes mt[RK_STATE_LEN] with a seed */ +static void init_genrand(mt19937_state *state, uint32_t s) { + int mti; + uint32_t *mt = state->key; + + mt[0] = s & 0xffffffffUL; + for (mti = 1; mti < RK_STATE_LEN; mti++) { + /* + * See Knuth TAOCP Vol2. 3rd Ed. P.106 for multiplier. + * In the previous versions, MSBs of the seed affect + * only MSBs of the array mt[]. + * 2002/01/09 modified by Makoto Matsumoto + */ + mt[mti] = (1812433253UL * (mt[mti - 1] ^ (mt[mti - 1] >> 30)) + mti); + /* for > 32 bit machines */ + mt[mti] &= 0xffffffffUL; + } + state->pos = mti; + return; +} + +/* + * initialize by an array with array-length + * init_key is the array for initializing keys + * key_length is its length + */ +void mt19937_init_by_array(mt19937_state *state, uint32_t *init_key, + int key_length) { + /* was signed in the original code. RDH 12/16/2002 */ + int i = 1; + int j = 0; + uint32_t *mt = state->key; + int k; + + init_genrand(state, 19650218UL); + k = (RK_STATE_LEN > key_length ? RK_STATE_LEN : key_length); + for (; k; k--) { + /* non linear */ + mt[i] = (mt[i] ^ ((mt[i - 1] ^ (mt[i - 1] >> 30)) * 1664525UL)) + + init_key[j] + j; + /* for > 32 bit machines */ + mt[i] &= 0xffffffffUL; + i++; + j++; + if (i >= RK_STATE_LEN) { + mt[0] = mt[RK_STATE_LEN - 1]; + i = 1; + } + if (j >= key_length) { + j = 0; + } + } + for (k = RK_STATE_LEN - 1; k; k--) { + mt[i] = (mt[i] ^ ((mt[i - 1] ^ (mt[i - 1] >> 30)) * 1566083941UL)) - + i; /* non linear */ + mt[i] &= 0xffffffffUL; /* for WORDSIZE > 32 machines */ + i++; + if (i >= RK_STATE_LEN) { + mt[0] = mt[RK_STATE_LEN - 1]; + i = 1; + } + } + + mt[0] = 0x80000000UL; /* MSB is 1; assuring non-zero initial array */ +} + +void mt19937_gen(mt19937_state *state) { + uint32_t y; + int i; + + for (i = 0; i < N - M; i++) { + y = (state->key[i] & UPPER_MASK) | (state->key[i + 1] & LOWER_MASK); + state->key[i] = state->key[i + M] ^ (y >> 1) ^ (-(y & 1) & MATRIX_A); + } + for (; i < N - 1; i++) { + y = (state->key[i] & UPPER_MASK) | (state->key[i + 1] & LOWER_MASK); + state->key[i] = state->key[i + (M - N)] ^ (y >> 1) ^ (-(y & 1) & MATRIX_A); + } + y = (state->key[N - 1] & UPPER_MASK) | (state->key[0] & LOWER_MASK); + state->key[N - 1] = state->key[M - 1] ^ (y >> 1) ^ (-(y & 1) & MATRIX_A); + + state->pos = 0; +} + +extern inline uint64_t mt19937_next64(mt19937_state *state); + +extern inline uint32_t mt19937_next32(mt19937_state *state); + +extern inline double mt19937_next_double(mt19937_state *state); + +void mt19937_jump(mt19937_state *state) { mt19937_jump_state(state, poly); } diff --git a/numpy/random/src/mt19937/mt19937.h b/numpy/random/src/mt19937/mt19937.h new file mode 100644 index 000000000..8105329ec --- /dev/null +++ b/numpy/random/src/mt19937/mt19937.h @@ -0,0 +1,69 @@ +#pragma once +#include <math.h> +#ifdef _WIN32 +#if _MSC_VER == 1500 +#include "../common/stdint.h" +#else +#include <stdint.h> +#endif +#else +#include <stdint.h> +#endif + +#ifdef _WIN32 +#define inline __forceinline +#endif + +#define RK_STATE_LEN 624 + +#define N 624 +#define M 397 +#define MATRIX_A 0x9908b0dfUL +#define UPPER_MASK 0x80000000UL +#define LOWER_MASK 0x7fffffffUL + +typedef struct s_mt19937_state { + uint32_t key[RK_STATE_LEN]; + int pos; +} mt19937_state; + +extern void mt19937_seed(mt19937_state *state, uint32_t seed); + +extern void mt19937_gen(mt19937_state *state); + +/* Slightly optimized reference implementation of the Mersenne Twister */ +static inline uint32_t mt19937_next(mt19937_state *state) { + uint32_t y; + + if (state->pos == RK_STATE_LEN) { + // Move to function to help inlining + mt19937_gen(state); + } + y = state->key[state->pos++]; + + /* Tempering */ + y ^= (y >> 11); + y ^= (y << 7) & 0x9d2c5680UL; + y ^= (y << 15) & 0xefc60000UL; + y ^= (y >> 18); + + return y; +} + +extern void mt19937_init_by_array(mt19937_state *state, uint32_t *init_key, + int key_length); + +static inline uint64_t mt19937_next64(mt19937_state *state) { + return (uint64_t)mt19937_next(state) << 32 | mt19937_next(state); +} + +static inline uint32_t mt19937_next32(mt19937_state *state) { + return mt19937_next(state); +} + +static inline double mt19937_next_double(mt19937_state *state) { + int32_t a = mt19937_next(state) >> 5, b = mt19937_next(state) >> 6; + return (a * 67108864.0 + b) / 9007199254740992.0; +} + +void mt19937_jump(mt19937_state *state); diff --git a/numpy/random/src/mt19937/randomkit.c b/numpy/random/src/mt19937/randomkit.c new file mode 100644 index 000000000..f8ed4b49e --- /dev/null +++ b/numpy/random/src/mt19937/randomkit.c @@ -0,0 +1,578 @@ +/* Random kit 1.3 */ + +/* + * Copyright (c) 2003-2005, Jean-Sebastien Roy (js@jeannot.org) + * + * The rk_random and rk_seed functions algorithms and the original design of + * the Mersenne Twister RNG: + * + * Copyright (C) 1997 - 2002, Makoto Matsumoto and Takuji Nishimura, + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * 3. The names of its contributors may not be used to endorse or promote + * products derived from this software without specific prior written + * permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + * OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + * LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + * NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + * SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Original algorithm for the implementation of rk_interval function from + * Richard J. Wagner's implementation of the Mersenne Twister RNG, optimised by + * Magnus Jonsson. + * + * Constants used in the rk_double implementation by Isaku Wada. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* static char const rcsid[] = + "@(#) $Jeannot: randomkit.c,v 1.28 2005/07/21 22:14:09 js Exp $"; */ + +#ifdef _WIN32 +/* + * Windows + * XXX: we have to use this ugly defined(__GNUC__) because it is not easy to + * detect the compiler used in distutils itself + */ +#if (defined(__GNUC__) && defined(NPY_NEEDS_MINGW_TIME_WORKAROUND)) + +/* + * FIXME: ideally, we should set this to the real version of MSVCRT. We need + * something higher than 0x601 to enable _ftime64 and co + */ +#define __MSVCRT_VERSION__ 0x0700 +#include <sys/timeb.h> +#include <time.h> + +/* + * mingw msvcr lib import wrongly export _ftime, which does not exist in the + * actual msvc runtime for version >= 8; we make it an alias to _ftime64, which + * is available in those versions of the runtime + */ +#define _FTIME(x) _ftime64((x)) +#else +#include <sys/timeb.h> +#include <time.h> + +#define _FTIME(x) _ftime((x)) +#endif + +#ifndef RK_NO_WINCRYPT +/* Windows crypto */ +#ifndef _WIN32_WINNT +#define _WIN32_WINNT 0x0400 +#endif +#include <wincrypt.h> +#include <windows.h> + +#endif + +/* + * Do not move this include. randomkit.h must be included + * after windows timeb.h is included. + */ +#include "randomkit.h" + +#else +/* Unix */ +#include "randomkit.h" +#include <sys/time.h> +#include <time.h> +#include <unistd.h> + +#endif + +#include <assert.h> +#include <errno.h> +#include <limits.h> +#include <math.h> +#include <stddef.h> +#include <stdio.h> +#include <stdlib.h> + +#ifndef RK_DEV_URANDOM +#define RK_DEV_URANDOM "/dev/urandom" +#endif + +#ifndef RK_DEV_RANDOM +#define RK_DEV_RANDOM "/dev/random" +#endif + +char *rk_strerror[RK_ERR_MAX] = {"no error", "random device unvavailable"}; + +/* static functions */ +static unsigned long rk_hash(unsigned long key); + +void rk_seed(unsigned long seed, rk_state *state) { + int pos; + seed &= 0xffffffffUL; + + /* Knuth's PRNG as used in the Mersenne Twister reference implementation */ + for (pos = 0; pos < RK_STATE_LEN; pos++) { + state->key[pos] = seed; + seed = (1812433253UL * (seed ^ (seed >> 30)) + pos + 1) & 0xffffffffUL; + } + state->pos = RK_STATE_LEN; + state->gauss = 0; + state->has_gauss = 0; + state->has_binomial = 0; +} + +/* Thomas Wang 32 bits integer hash function */ +unsigned long rk_hash(unsigned long key) { + key += ~(key << 15); + key ^= (key >> 10); + key += (key << 3); + key ^= (key >> 6); + key += ~(key << 11); + key ^= (key >> 16); + return key; +} + +rk_error rk_randomseed(rk_state *state) { +#ifndef _WIN32 + struct timeval tv; +#else + struct _timeb tv; +#endif + int i; + + if (rk_devfill(state->key, sizeof(state->key), 0) == RK_NOERR) { + /* ensures non-zero key */ + state->key[0] |= 0x80000000UL; + state->pos = RK_STATE_LEN; + state->gauss = 0; + state->has_gauss = 0; + state->has_binomial = 0; + + for (i = 0; i < 624; i++) { + state->key[i] &= 0xffffffffUL; + } + return RK_NOERR; + } + +#ifndef _WIN32 + gettimeofday(&tv, NULL); + rk_seed(rk_hash(getpid()) ^ rk_hash(tv.tv_sec) ^ rk_hash(tv.tv_usec) ^ + rk_hash(clock()), + state); +#else + _FTIME(&tv); + rk_seed(rk_hash(tv.time) ^ rk_hash(tv.millitm) ^ rk_hash(clock()), state); +#endif + + return RK_ENODEV; +} + +/* Magic Mersenne Twister constants */ +#define N 624 +#define M 397 +#define MATRIX_A 0x9908b0dfUL +#define UPPER_MASK 0x80000000UL +#define LOWER_MASK 0x7fffffffUL + +/* + * Slightly optimised reference implementation of the Mersenne Twister + * Note that regardless of the precision of long, only 32 bit random + * integers are produced + */ +unsigned long rk_random(rk_state *state) { + unsigned long y; + + if (state->pos == RK_STATE_LEN) { + int i; + + for (i = 0; i < N - M; i++) { + y = (state->key[i] & UPPER_MASK) | (state->key[i + 1] & LOWER_MASK); + state->key[i] = state->key[i + M] ^ (y >> 1) ^ (-(y & 1) & MATRIX_A); + } + for (; i < N - 1; i++) { + y = (state->key[i] & UPPER_MASK) | (state->key[i + 1] & LOWER_MASK); + state->key[i] = + state->key[i + (M - N)] ^ (y >> 1) ^ (-(y & 1) & MATRIX_A); + } + y = (state->key[N - 1] & UPPER_MASK) | (state->key[0] & LOWER_MASK); + state->key[N - 1] = state->key[M - 1] ^ (y >> 1) ^ (-(y & 1) & MATRIX_A); + + state->pos = 0; + } + y = state->key[state->pos++]; + + /* Tempering */ + y ^= (y >> 11); + y ^= (y << 7) & 0x9d2c5680UL; + y ^= (y << 15) & 0xefc60000UL; + y ^= (y >> 18); + + return y; +} + +/* + * Returns an unsigned 64 bit random integer. + */ +NPY_INLINE static npy_uint64 rk_uint64(rk_state *state) { + npy_uint64 upper = (npy_uint64)rk_random(state) << 32; + npy_uint64 lower = (npy_uint64)rk_random(state); + return upper | lower; +} + +/* + * Returns an unsigned 32 bit random integer. + */ +NPY_INLINE static npy_uint32 rk_uint32(rk_state *state) { + return (npy_uint32)rk_random(state); +} + +/* + * Fills an array with cnt random npy_uint64 between off and off + rng + * inclusive. The numbers wrap if rng is sufficiently large. + */ +void rk_random_uint64(npy_uint64 off, npy_uint64 rng, npy_intp cnt, + npy_uint64 *out, rk_state *state) { + npy_uint64 val, mask = rng; + npy_intp i; + + if (rng == 0) { + for (i = 0; i < cnt; i++) { + out[i] = off; + } + return; + } + + /* Smallest bit mask >= max */ + mask |= mask >> 1; + mask |= mask >> 2; + mask |= mask >> 4; + mask |= mask >> 8; + mask |= mask >> 16; + mask |= mask >> 32; + + for (i = 0; i < cnt; i++) { + if (rng <= 0xffffffffUL) { + while ((val = (rk_uint32(state) & mask)) > rng) + ; + } else { + while ((val = (rk_uint64(state) & mask)) > rng) + ; + } + out[i] = off + val; + } +} + +/* + * Fills an array with cnt random npy_uint32 between off and off + rng + * inclusive. The numbers wrap if rng is sufficiently large. + */ +void rk_random_uint32(npy_uint32 off, npy_uint32 rng, npy_intp cnt, + npy_uint32 *out, rk_state *state) { + npy_uint32 val, mask = rng; + npy_intp i; + + if (rng == 0) { + for (i = 0; i < cnt; i++) { + out[i] = off; + } + return; + } + + /* Smallest bit mask >= max */ + mask |= mask >> 1; + mask |= mask >> 2; + mask |= mask >> 4; + mask |= mask >> 8; + mask |= mask >> 16; + + for (i = 0; i < cnt; i++) { + while ((val = (rk_uint32(state) & mask)) > rng) + ; + out[i] = off + val; + } +} + +/* + * Fills an array with cnt random npy_uint16 between off and off + rng + * inclusive. The numbers wrap if rng is sufficiently large. + */ +void rk_random_uint16(npy_uint16 off, npy_uint16 rng, npy_intp cnt, + npy_uint16 *out, rk_state *state) { + npy_uint16 val, mask = rng; + npy_intp i; + npy_uint32 buf; + int bcnt = 0; + + if (rng == 0) { + for (i = 0; i < cnt; i++) { + out[i] = off; + } + return; + } + + /* Smallest bit mask >= max */ + mask |= mask >> 1; + mask |= mask >> 2; + mask |= mask >> 4; + mask |= mask >> 8; + + for (i = 0; i < cnt; i++) { + do { + if (!bcnt) { + buf = rk_uint32(state); + bcnt = 1; + } else { + buf >>= 16; + bcnt--; + } + val = (npy_uint16)buf & mask; + } while (val > rng); + out[i] = off + val; + } +} + +/* + * Fills an array with cnt random npy_uint8 between off and off + rng + * inclusive. The numbers wrap if rng is sufficiently large. + */ +void rk_random_uint8(npy_uint8 off, npy_uint8 rng, npy_intp cnt, npy_uint8 *out, + rk_state *state) { + npy_uint8 val, mask = rng; + npy_intp i; + npy_uint32 buf; + int bcnt = 0; + + if (rng == 0) { + for (i = 0; i < cnt; i++) { + out[i] = off; + } + return; + } + + /* Smallest bit mask >= max */ + mask |= mask >> 1; + mask |= mask >> 2; + mask |= mask >> 4; + + for (i = 0; i < cnt; i++) { + do { + if (!bcnt) { + buf = rk_uint32(state); + bcnt = 3; + } else { + buf >>= 8; + bcnt--; + } + val = (npy_uint8)buf & mask; + } while (val > rng); + out[i] = off + val; + } +} + +/* + * Fills an array with cnt random npy_bool between off and off + rng + * inclusive. + */ +void rk_random_bool(npy_bool off, npy_bool rng, npy_intp cnt, npy_bool *out, + rk_state *state) { + npy_intp i; + npy_uint32 buf; + int bcnt = 0; + + if (rng == 0) { + for (i = 0; i < cnt; i++) { + out[i] = off; + } + return; + } + + /* If we reach here rng and mask are one and off is zero */ + assert(rng == 1 && off == 0); + for (i = 0; i < cnt; i++) { + if (!bcnt) { + buf = rk_uint32(state); + bcnt = 31; + } else { + buf >>= 1; + bcnt--; + } + out[i] = (buf & 0x00000001) != 0; + } +} + +long rk_long(rk_state *state) { return rk_ulong(state) >> 1; } + +unsigned long rk_ulong(rk_state *state) { +#if ULONG_MAX <= 0xffffffffUL + return rk_random(state); +#else + return (rk_random(state) << 32) | (rk_random(state)); +#endif +} + +unsigned long rk_interval(unsigned long max, rk_state *state) { + unsigned long mask = max, value; + + if (max == 0) { + return 0; + } + /* Smallest bit mask >= max */ + mask |= mask >> 1; + mask |= mask >> 2; + mask |= mask >> 4; + mask |= mask >> 8; + mask |= mask >> 16; +#if ULONG_MAX > 0xffffffffUL + mask |= mask >> 32; +#endif + + /* Search a random value in [0..mask] <= max */ +#if ULONG_MAX > 0xffffffffUL + if (max <= 0xffffffffUL) { + while ((value = (rk_random(state) & mask)) > max) + ; + } else { + while ((value = (rk_ulong(state) & mask)) > max) + ; + } +#else + while ((value = (rk_ulong(state) & mask)) > max) + ; +#endif + return value; +} + +double rk_double(rk_state *state) { + /* shifts : 67108864 = 0x4000000, 9007199254740992 = 0x20000000000000 */ + long a = rk_random(state) >> 5, b = rk_random(state) >> 6; + return (a * 67108864.0 + b) / 9007199254740992.0; +} + +void rk_fill(void *buffer, size_t size, rk_state *state) { + unsigned long r; + unsigned char *buf = buffer; + + for (; size >= 4; size -= 4) { + r = rk_random(state); + *(buf++) = r & 0xFF; + *(buf++) = (r >> 8) & 0xFF; + *(buf++) = (r >> 16) & 0xFF; + *(buf++) = (r >> 24) & 0xFF; + } + + if (!size) { + return; + } + r = rk_random(state); + for (; size; r >>= 8, size--) { + *(buf++) = (unsigned char)(r & 0xFF); + } +} + +rk_error rk_devfill(void *buffer, size_t size, int strong) { +#ifndef _WIN32 + FILE *rfile; + int done; + + if (strong) { + rfile = fopen(RK_DEV_RANDOM, "rb"); + } else { + rfile = fopen(RK_DEV_URANDOM, "rb"); + } + if (rfile == NULL) { + return RK_ENODEV; + } + done = fread(buffer, size, 1, rfile); + fclose(rfile); + if (done) { + return RK_NOERR; + } +#else + +#ifndef RK_NO_WINCRYPT + HCRYPTPROV hCryptProv; + BOOL done; + + if (!CryptAcquireContext(&hCryptProv, NULL, NULL, PROV_RSA_FULL, + CRYPT_VERIFYCONTEXT) || + !hCryptProv) { + return RK_ENODEV; + } + done = CryptGenRandom(hCryptProv, size, (unsigned char *)buffer); + CryptReleaseContext(hCryptProv, 0); + if (done) { + return RK_NOERR; + } +#endif + +#endif + return RK_ENODEV; +} + +rk_error rk_altfill(void *buffer, size_t size, int strong, rk_state *state) { + rk_error err; + + err = rk_devfill(buffer, size, strong); + if (err) { + rk_fill(buffer, size, state); + } + return err; +} + +double rk_gauss(rk_state *state) { + if (state->has_gauss) { + const double tmp = state->gauss; + state->gauss = 0; + state->has_gauss = 0; + return tmp; + } else { + double f, x1, x2, r2; + + do { + x1 = 2.0 * rk_double(state) - 1.0; + x2 = 2.0 * rk_double(state) - 1.0; + r2 = x1 * x1 + x2 * x2; + } while (r2 >= 1.0 || r2 == 0.0); + + /* Polar method, a more efficient version of the Box-Muller approach. */ + f = sqrt(-2.0 * log(r2) / r2); + /* Keep for next call */ + state->gauss = f * x1; + state->has_gauss = 1; + return f * x2; + } +} diff --git a/numpy/random/src/mt19937/randomkit.h b/numpy/random/src/mt19937/randomkit.h new file mode 100644 index 000000000..abb082cb2 --- /dev/null +++ b/numpy/random/src/mt19937/randomkit.h @@ -0,0 +1,223 @@ +/* Random kit 1.3 */ + +/* + * Copyright (c) 2003-2005, Jean-Sebastien Roy (js@jeannot.org) + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sublicense, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice shall be included + * in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. + * IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY + * CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + */ + +/* @(#) $Jeannot: randomkit.h,v 1.24 2005/07/21 22:14:09 js Exp $ */ + +/* + * Typical use: + * + * { + * rk_state state; + * unsigned long seed = 1, random_value; + * + * rk_seed(seed, &state); // Initialize the RNG + * ... + * random_value = rk_random(&state); // Generate random values in [0..RK_MAX] + * } + * + * Instead of rk_seed, you can use rk_randomseed which will get a random seed + * from /dev/urandom (or the clock, if /dev/urandom is unavailable): + * + * { + * rk_state state; + * unsigned long random_value; + * + * rk_randomseed(&state); // Initialize the RNG with a random seed + * ... + * random_value = rk_random(&state); // Generate random values in [0..RK_MAX] + * } + */ + +/* + * Useful macro: + * RK_DEV_RANDOM: the device used for random seeding. + * defaults to "/dev/urandom" + */ + +#ifndef _RANDOMKIT_ +#define _RANDOMKIT_ + +#include <numpy/npy_common.h> +#include <stddef.h> + +#define RK_STATE_LEN 624 + +typedef struct rk_state_ { + unsigned long key[RK_STATE_LEN]; + int pos; + int has_gauss; /* !=0: gauss contains a gaussian deviate */ + double gauss; + + /* The rk_state structure has been extended to store the following + * information for the binomial generator. If the input values of n or p + * are different than nsave and psave, then the other parameters will be + * recomputed. RTK 2005-09-02 */ + + int has_binomial; /* !=0: following parameters initialized for + binomial */ + double psave; + long nsave; + double r; + double q; + double fm; + long m; + double p1; + double xm; + double xl; + double xr; + double c; + double laml; + double lamr; + double p2; + double p3; + double p4; + +} rk_state; + +typedef enum { + RK_NOERR = 0, /* no error */ + RK_ENODEV = 1, /* no RK_DEV_RANDOM device */ + RK_ERR_MAX = 2 +} rk_error; + +/* error strings */ +extern char *rk_strerror[RK_ERR_MAX]; + +/* Maximum generated random value */ +#define RK_MAX 0xFFFFFFFFUL + +#ifdef __cplusplus +extern "C" { +#endif + +/* + * Initialize the RNG state using the given seed. + */ +extern void rk_seed(unsigned long seed, rk_state *state); + +/* + * Initialize the RNG state using a random seed. + * Uses /dev/random or, when unavailable, the clock (see randomkit.c). + * Returns RK_NOERR when no errors occurs. + * Returns RK_ENODEV when the use of RK_DEV_RANDOM failed (for example because + * there is no such device). In this case, the RNG was initialized using the + * clock. + */ +extern rk_error rk_randomseed(rk_state *state); + +/* + * Returns a random unsigned long between 0 and RK_MAX inclusive + */ +extern unsigned long rk_random(rk_state *state); + +/* + * Returns a random long between 0 and LONG_MAX inclusive + */ +extern long rk_long(rk_state *state); + +/* + * Returns a random unsigned long between 0 and ULONG_MAX inclusive + */ +extern unsigned long rk_ulong(rk_state *state); + +/* + * Returns a random unsigned long between 0 and max inclusive. + */ +extern unsigned long rk_interval(unsigned long max, rk_state *state); + +/* + * Fills an array with cnt random npy_uint64 between off and off + rng + * inclusive. The numbers wrap if rng is sufficiently large. + */ +extern void rk_random_uint64(npy_uint64 off, npy_uint64 rng, npy_intp cnt, + npy_uint64 *out, rk_state *state); + +/* + * Fills an array with cnt random npy_uint32 between off and off + rng + * inclusive. The numbers wrap if rng is sufficiently large. + */ +extern void rk_random_uint32(npy_uint32 off, npy_uint32 rng, npy_intp cnt, + npy_uint32 *out, rk_state *state); + +/* + * Fills an array with cnt random npy_uint16 between off and off + rng + * inclusive. The numbers wrap if rng is sufficiently large. + */ +extern void rk_random_uint16(npy_uint16 off, npy_uint16 rng, npy_intp cnt, + npy_uint16 *out, rk_state *state); + +/* + * Fills an array with cnt random npy_uint8 between off and off + rng + * inclusive. The numbers wrap if rng is sufficiently large. + */ +extern void rk_random_uint8(npy_uint8 off, npy_uint8 rng, npy_intp cnt, + npy_uint8 *out, rk_state *state); + +/* + * Fills an array with cnt random npy_bool between off and off + rng + * inclusive. It is assumed tha npy_bool as the same size as npy_uint8. + */ +extern void rk_random_bool(npy_bool off, npy_bool rng, npy_intp cnt, + npy_bool *out, rk_state *state); + +/* + * Returns a random double between 0.0 and 1.0, 1.0 excluded. + */ +extern double rk_double(rk_state *state); + +/* + * fill the buffer with size random bytes + */ +extern void rk_fill(void *buffer, size_t size, rk_state *state); + +/* + * fill the buffer with randombytes from the random device + * Returns RK_ENODEV if the device is unavailable, or RK_NOERR if it is + * On Unix, if strong is defined, RK_DEV_RANDOM is used. If not, RK_DEV_URANDOM + * is used instead. This parameter has no effect on Windows. + * Warning: on most unixes RK_DEV_RANDOM will wait for enough entropy to answer + * which can take a very long time on quiet systems. + */ +extern rk_error rk_devfill(void *buffer, size_t size, int strong); + +/* + * fill the buffer using rk_devfill if the random device is available and using + * rk_fill if it is not + * parameters have the same meaning as rk_fill and rk_devfill + * Returns RK_ENODEV if the device is unavailable, or RK_NOERR if it is + */ +extern rk_error rk_altfill(void *buffer, size_t size, int strong, + rk_state *state); + +/* + * return a random gaussian deviate with variance unity and zero mean. + */ +extern double rk_gauss(rk_state *state); + +#ifdef __cplusplus +} +#endif + +#endif /* _RANDOMKIT_ */ diff --git a/numpy/random/src/pcg32/LICENSE.md b/numpy/random/src/pcg32/LICENSE.md new file mode 100644 index 000000000..3db2ac2e8 --- /dev/null +++ b/numpy/random/src/pcg32/LICENSE.md @@ -0,0 +1,22 @@ +# PCG32 + +PCG Random Number Generation for C. + +Copyright 2014 Melissa O'Neill <oneill@pcg-random.org> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +For additional information about the PCG random number generation scheme, +including its license and other licensing options, visit + + http://www.pcg-random.org diff --git a/numpy/random/src/pcg32/pcg-advance-64.c b/numpy/random/src/pcg32/pcg-advance-64.c new file mode 100644 index 000000000..8210e7565 --- /dev/null +++ b/numpy/random/src/pcg32/pcg-advance-64.c @@ -0,0 +1,62 @@ +/* + * PCG Random Number Generation for C. + * + * Copyright 2014 Melissa O'Neill <oneill@pcg-random.org> + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For additional information about the PCG random number generation scheme, + * including its license and other licensing options, visit + * + * http://www.pcg-random.org + */ + +/* + * This code is derived from the canonical C++ PCG implementation, which + * has many additional features and is preferable if you can use C++ in + * your project. + * + * Repetative C code is derived using C preprocessor metaprogramming + * techniques. + */ + +#include "pcg_variants.h" + +/* Multi-step advance functions (jump-ahead, jump-back) + * + * The method used here is based on Brown, "Random Number Generation + * with Arbitrary Stride,", Transactions of the American Nuclear + * Society (Nov. 1994). The algorithm is very similar to fast + * exponentiation. + * + * Even though delta is an unsigned integer, we can pass a + * signed integer to go backwards, it just goes "the long way round". + */ + +uint64_t pcg_advance_lcg_64(uint64_t state, uint64_t delta, uint64_t cur_mult, + uint64_t cur_plus) +{ + uint64_t acc_mult = 1u; + uint64_t acc_plus = 0u; + while (delta > 0) { + if (delta & 1) { + acc_mult *= cur_mult; + acc_plus = acc_plus * cur_mult + cur_plus; + } + cur_plus = (cur_mult + 1) * cur_plus; + cur_mult *= cur_mult; + delta /= 2; + } + return acc_mult * state + acc_plus; +} + diff --git a/numpy/random/src/pcg32/pcg32-test-data-gen.c b/numpy/random/src/pcg32/pcg32-test-data-gen.c new file mode 100644 index 000000000..cccaf84b9 --- /dev/null +++ b/numpy/random/src/pcg32/pcg32-test-data-gen.c @@ -0,0 +1,59 @@ +/* + * Generate testing csv files + * + * + * gcc pcg32-test-data-gen.c pcg32.orig.c ../splitmix64/splitmix64.c -o + * pgc64-test-data-gen + */ + +#include "pcg_variants.h" +#include <inttypes.h> +#include <stdio.h> + +#define N 1000 + +int main() { + pcg32_random_t rng; + uint64_t inc, seed = 0xDEADBEAF; + inc = 0; + int i; + uint64_t store[N]; + pcg32_srandom_r(&rng, seed, inc); + for (i = 0; i < N; i++) { + store[i] = pcg32_random_r(&rng); + } + + FILE *fp; + fp = fopen("pcg32-testset-1.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); + + seed = 0; + pcg32_srandom_r(&rng, seed, inc); + for (i = 0; i < N; i++) { + store[i] = pcg32_random_r(&rng); + } + fp = fopen("pcg32-testset-2.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); +} diff --git a/numpy/random/src/pcg32/pcg32.c b/numpy/random/src/pcg32/pcg32.c new file mode 100644 index 000000000..5fbf6759f --- /dev/null +++ b/numpy/random/src/pcg32/pcg32.c @@ -0,0 +1,30 @@ +#include "pcg32.h" + +extern inline uint64_t pcg32_next64(pcg32_state *state); +extern inline uint32_t pcg32_next32(pcg32_state *state); +extern inline double pcg32_next_double(pcg32_state *state); + +uint64_t pcg_advance_lcg_64(uint64_t state, uint64_t delta, uint64_t cur_mult, + uint64_t cur_plus) { + uint64_t acc_mult, acc_plus; + acc_mult = 1u; + acc_plus = 0u; + while (delta > 0) { + if (delta & 1) { + acc_mult *= cur_mult; + acc_plus = acc_plus * cur_mult + cur_plus; + } + cur_plus = (cur_mult + 1) * cur_plus; + cur_mult *= cur_mult; + delta /= 2; + } + return acc_mult * state + acc_plus; +} + +extern void pcg32_advance_state(pcg32_state *state, uint64_t step) { + pcg32_advance_r(state->pcg_state, step); +} + +extern void pcg32_set_seed(pcg32_state *state, uint64_t seed, uint64_t inc) { + pcg32_srandom_r(state->pcg_state, seed, inc); +} diff --git a/numpy/random/src/pcg32/pcg32.h b/numpy/random/src/pcg32/pcg32.h new file mode 100644 index 000000000..557113d8f --- /dev/null +++ b/numpy/random/src/pcg32/pcg32.h @@ -0,0 +1,89 @@ +#ifndef _RANDOMDGEN__PCG32_H_ +#define _RANDOMDGEN__PCG32_H_ + +#ifdef _WIN32 +#ifndef _INTTYPES +#include "../common/stdint.h" +#endif +#define inline __inline __forceinline +#else +#include <inttypes.h> +#endif + +#define PCG_DEFAULT_MULTIPLIER_64 6364136223846793005ULL + +struct pcg_state_setseq_64 { + uint64_t state; + uint64_t inc; +}; + +static inline uint32_t pcg_rotr_32(uint32_t value, unsigned int rot) { +#if PCG_USE_INLINE_ASM && __clang__ && (__x86_64__ || __i386__) + asm("rorl %%cl, %0" : "=r"(value) : "0"(value), "c"(rot)); + return value; +#else + return (value >> rot) | (value << ((-rot) & 31)); +#endif +} + +static inline void pcg_setseq_64_step_r(struct pcg_state_setseq_64 *rng) { + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_64 + rng->inc; +} + +static inline uint32_t pcg_output_xsh_rr_64_32(uint64_t state) { + return pcg_rotr_32(((state >> 18u) ^ state) >> 27u, state >> 59u); +} + +static inline uint32_t +pcg_setseq_64_xsh_rr_32_random_r(struct pcg_state_setseq_64 *rng) { + uint64_t oldstate; + oldstate = rng->state; + pcg_setseq_64_step_r(rng); + return pcg_output_xsh_rr_64_32(oldstate); +} + +static inline void pcg_setseq_64_srandom_r(struct pcg_state_setseq_64 *rng, + uint64_t initstate, + uint64_t initseq) { + rng->state = 0U; + rng->inc = (initseq << 1u) | 1u; + pcg_setseq_64_step_r(rng); + rng->state += initstate; + pcg_setseq_64_step_r(rng); +} + +extern uint64_t pcg_advance_lcg_64(uint64_t state, uint64_t delta, + uint64_t cur_mult, uint64_t cur_plus); + +static inline void pcg_setseq_64_advance_r(struct pcg_state_setseq_64 *rng, + uint64_t delta) { + rng->state = pcg_advance_lcg_64(rng->state, delta, PCG_DEFAULT_MULTIPLIER_64, + rng->inc); +} + +typedef struct pcg_state_setseq_64 pcg32_random_t; +#define pcg32_random_r pcg_setseq_64_xsh_rr_32_random_r +#define pcg32_srandom_r pcg_setseq_64_srandom_r +#define pcg32_advance_r pcg_setseq_64_advance_r + +typedef struct s_pcg32_state { pcg32_random_t *pcg_state; } pcg32_state; + +static inline uint64_t pcg32_next64(pcg32_state *state) { + return (uint64_t)(pcg32_random_r(state->pcg_state)) << 32 | + pcg32_random_r(state->pcg_state); +} + +static inline uint32_t pcg32_next32(pcg32_state *state) { + return pcg32_random_r(state->pcg_state); +} + +static inline double pcg32_next_double(pcg32_state *state) { + int32_t a = pcg32_random_r(state->pcg_state) >> 5, + b = pcg32_random_r(state->pcg_state) >> 6; + return (a * 67108864.0 + b) / 9007199254740992.0; +} + +void pcg32_advance_state(pcg32_state *state, uint64_t step); +void pcg32_set_seed(pcg32_state *state, uint64_t seed, uint64_t inc); + +#endif diff --git a/numpy/random/src/pcg32/pcg_variants.h b/numpy/random/src/pcg32/pcg_variants.h new file mode 100644 index 000000000..32daac1ce --- /dev/null +++ b/numpy/random/src/pcg32/pcg_variants.h @@ -0,0 +1,2210 @@ +/* + * PCG Random Number Generation for C. + * + * Copyright 2014 Melissa O'Neill <oneill@pcg-random.org> + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For additional information about the PCG random number generation scheme, + * including its license and other licensing options, visit + * + * http://www.pcg-random.org + */ + +/* + * This code is derived from the canonical C++ PCG implementation, which + * has many additional features and is preferable if you can use C++ in + * your project. + * + * Much of the derivation was performed mechanically. In particular, the + * output functions were generated by compiling the C++ output functions + * into LLVM bitcode and then transforming that using the LLVM C backend + * (from https://github.com/draperlaboratory/llvm-cbe), and then + * postprocessing and hand editing the output. + * + * Much of the remaining code was generated by C-preprocessor metaprogramming. + */ + +#ifndef PCG_VARIANTS_H_INCLUDED +#define PCG_VARIANTS_H_INCLUDED 1 + +#include <inttypes.h> + +#if __SIZEOF_INT128__ + typedef __uint128_t pcg128_t; + #define PCG_128BIT_CONSTANT(high,low) \ + ((((pcg128_t)high) << 64) + low) + #define PCG_HAS_128BIT_OPS 1 +#endif + +#if __GNUC_GNU_INLINE__ && !defined(__cplusplus) + #error Nonstandard GNU inlining semantics. Compile with -std=c99 or better. + // We could instead use macros PCG_INLINE and PCG_EXTERN_INLINE + // but better to just reject ancient C code. +#endif + +#if __cplusplus +extern "C" { +#endif + +/* + * Rotate helper functions. + */ + +inline uint8_t pcg_rotr_8(uint8_t value, unsigned int rot) +{ +/* Unfortunately, clang is kinda pathetic when it comes to properly + * recognizing idiomatic rotate code, so for clang we actually provide + * assembler directives (enabled with PCG_USE_INLINE_ASM). Boo, hiss. + */ +#if PCG_USE_INLINE_ASM && __clang__ && (__x86_64__ || __i386__) + asm ("rorb %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); + return value; +#else + return (value >> rot) | (value << ((- rot) & 7)); +#endif +} + +inline uint16_t pcg_rotr_16(uint16_t value, unsigned int rot) +{ +#if PCG_USE_INLINE_ASM && __clang__ && (__x86_64__ || __i386__) + asm ("rorw %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); + return value; +#else + return (value >> rot) | (value << ((- rot) & 15)); +#endif +} + +inline uint32_t pcg_rotr_32(uint32_t value, unsigned int rot) +{ +#if PCG_USE_INLINE_ASM && __clang__ && (__x86_64__ || __i386__) + asm ("rorl %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); + return value; +#else + return (value >> rot) | (value << ((- rot) & 31)); +#endif +} + +inline uint64_t pcg_rotr_64(uint64_t value, unsigned int rot) +{ +#if 0 && PCG_USE_INLINE_ASM && __clang__ && __x86_64__ + // For whatever reason, clang actually *does* generate rotq by + // itself, so we don't need this code. + asm ("rorq %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); + return value; +#else + return (value >> rot) | (value << ((- rot) & 63)); +#endif +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t pcg_rotr_128(pcg128_t value, unsigned int rot) +{ + return (value >> rot) | (value << ((- rot) & 127)); +} +#endif + +/* + * Output functions. These are the core of the PCG generation scheme. + */ + +// XSH RS + +inline uint8_t pcg_output_xsh_rs_16_8(uint16_t state) +{ + return (uint8_t)(((state >> 7u) ^ state) >> ((state >> 14u) + 3u)); +} + +inline uint16_t pcg_output_xsh_rs_32_16(uint32_t state) +{ + return (uint16_t)(((state >> 11u) ^ state) >> ((state >> 30u) + 11u)); +} + +inline uint32_t pcg_output_xsh_rs_64_32(uint64_t state) +{ + + return (uint32_t)(((state >> 22u) ^ state) >> ((state >> 61u) + 22u)); +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_output_xsh_rs_128_64(pcg128_t state) +{ + return (uint64_t)(((state >> 43u) ^ state) >> ((state >> 124u) + 45u)); +} +#endif + +// XSH RR + +inline uint8_t pcg_output_xsh_rr_16_8(uint16_t state) +{ + return pcg_rotr_8(((state >> 5u) ^ state) >> 5u, state >> 13u); +} + +inline uint16_t pcg_output_xsh_rr_32_16(uint32_t state) +{ + return pcg_rotr_16(((state >> 10u) ^ state) >> 12u, state >> 28u); +} + +inline uint32_t pcg_output_xsh_rr_64_32(uint64_t state) +{ + return pcg_rotr_32(((state >> 18u) ^ state) >> 27u, state >> 59u); +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_output_xsh_rr_128_64(pcg128_t state) +{ + return pcg_rotr_64(((state >> 29u) ^ state) >> 58u, state >> 122u); +} +#endif + +// RXS M XS + +inline uint8_t pcg_output_rxs_m_xs_8_8(uint8_t state) +{ + uint8_t word = ((state >> ((state >> 6u) + 2u)) ^ state) * 217u; + return (word >> 6u) ^ word; +} + +inline uint16_t pcg_output_rxs_m_xs_16_16(uint16_t state) +{ + uint16_t word = ((state >> ((state >> 13u) + 3u)) ^ state) * 62169u; + return (word >> 11u) ^ word; +} + +inline uint32_t pcg_output_rxs_m_xs_32_32(uint32_t state) +{ + uint32_t word = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u; + return (word >> 22u) ^ word; +} + +inline uint64_t pcg_output_rxs_m_xs_64_64(uint64_t state) +{ + uint64_t word = ((state >> ((state >> 59u) + 5u)) ^ state) + * 12605985483714917081ull; + return (word >> 43u) ^ word; +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t pcg_output_rxs_m_xs_128_128(pcg128_t state) +{ + pcg128_t word = ((state >> ((state >> 122u) + 6u)) ^ state) + * (PCG_128BIT_CONSTANT(17766728186571221404ULL, + 12605985483714917081ULL)); + // 327738287884841127335028083622016905945 + return (word >> 86u) ^ word; +} +#endif + +// XSL RR (only defined for >= 64 bits) + +inline uint32_t pcg_output_xsl_rr_64_32(uint64_t state) +{ + return pcg_rotr_32(((uint32_t)(state >> 32u)) ^ (uint32_t)state, + state >> 59u); +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_output_xsl_rr_128_64(pcg128_t state) +{ + return pcg_rotr_64(((uint64_t)(state >> 64u)) ^ (uint64_t)state, + state >> 122u); +} +#endif + +// XSL RR RR (only defined for >= 64 bits) + +inline uint64_t pcg_output_xsl_rr_rr_64_64(uint64_t state) +{ + uint32_t rot1 = (uint32_t)(state >> 59u); + uint32_t high = (uint32_t)(state >> 32u); + uint32_t low = (uint32_t)state; + uint32_t xored = high ^ low; + uint32_t newlow = pcg_rotr_32(xored, rot1); + uint32_t newhigh = pcg_rotr_32(high, newlow & 31u); + return (((uint64_t)newhigh) << 32u) | newlow; +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t pcg_output_xsl_rr_rr_128_128(pcg128_t state) +{ + uint32_t rot1 = (uint32_t)(state >> 122u); + uint64_t high = (uint64_t)(state >> 64u); + uint64_t low = (uint64_t)state; + uint64_t xored = high ^ low; + uint64_t newlow = pcg_rotr_64(xored, rot1); + uint64_t newhigh = pcg_rotr_64(high, newlow & 63u); + return (((pcg128_t)newhigh) << 64u) | newlow; +} +#endif + +#define PCG_DEFAULT_MULTIPLIER_8 141U +#define PCG_DEFAULT_MULTIPLIER_16 12829U +#define PCG_DEFAULT_MULTIPLIER_32 747796405U +#define PCG_DEFAULT_MULTIPLIER_64 6364136223846793005ULL + +#define PCG_DEFAULT_INCREMENT_8 77U +#define PCG_DEFAULT_INCREMENT_16 47989U +#define PCG_DEFAULT_INCREMENT_32 2891336453U +#define PCG_DEFAULT_INCREMENT_64 1442695040888963407ULL + +#if PCG_HAS_128BIT_OPS +#define PCG_DEFAULT_MULTIPLIER_128 \ + PCG_128BIT_CONSTANT(2549297995355413924ULL,4865540595714422341ULL) +#define PCG_DEFAULT_INCREMENT_128 \ + PCG_128BIT_CONSTANT(6364136223846793005ULL,1442695040888963407ULL) +#endif + +/* + * Static initialization constants (if you can't call srandom for some + * bizarre reason). + */ + +#define PCG_STATE_ONESEQ_8_INITIALIZER { 0xd7U } +#define PCG_STATE_ONESEQ_16_INITIALIZER { 0x20dfU } +#define PCG_STATE_ONESEQ_32_INITIALIZER { 0x46b56677U } +#define PCG_STATE_ONESEQ_64_INITIALIZER { 0x4d595df4d0f33173ULL } +#if PCG_HAS_128BIT_OPS +#define PCG_STATE_ONESEQ_128_INITIALIZER \ + { PCG_128BIT_CONSTANT(0xb8dc10e158a92392ULL, 0x98046df007ec0a53ULL) } +#endif + +#define PCG_STATE_UNIQUE_8_INITIALIZER PCG_STATE_ONESEQ_8_INITIALIZER +#define PCG_STATE_UNIQUE_16_INITIALIZER PCG_STATE_ONESEQ_16_INITIALIZER +#define PCG_STATE_UNIQUE_32_INITIALIZER PCG_STATE_ONESEQ_32_INITIALIZER +#define PCG_STATE_UNIQUE_64_INITIALIZER PCG_STATE_ONESEQ_64_INITIALIZER +#if PCG_HAS_128BIT_OPS +#define PCG_STATE_UNIQUE_128_INITIALIZER PCG_STATE_ONESEQ_128_INITIALIZER +#endif + +#define PCG_STATE_MCG_8_INITIALIZER { 0xe5U } +#define PCG_STATE_MCG_16_INITIALIZER { 0xa5e5U } +#define PCG_STATE_MCG_32_INITIALIZER { 0xd15ea5e5U } +#define PCG_STATE_MCG_64_INITIALIZER { 0xcafef00dd15ea5e5ULL } +#if PCG_HAS_128BIT_OPS +#define PCG_STATE_MCG_128_INITIALIZER \ + { PCG_128BIT_CONSTANT(0x0000000000000000ULL, 0xcafef00dd15ea5e5ULL) } +#endif + +#define PCG_STATE_SETSEQ_8_INITIALIZER { 0x9bU, 0xdbU } +#define PCG_STATE_SETSEQ_16_INITIALIZER { 0xe39bU, 0x5bdbU } +#define PCG_STATE_SETSEQ_32_INITIALIZER { 0xec02d89bU, 0x94b95bdbU } +#define PCG_STATE_SETSEQ_64_INITIALIZER \ + { 0x853c49e6748fea9bULL, 0xda3e39cb94b95bdbULL } +#if PCG_HAS_128BIT_OPS +#define PCG_STATE_SETSEQ_128_INITIALIZER \ + { PCG_128BIT_CONSTANT(0x979c9a98d8462005ULL, 0x7d3e9cb6cfe0549bULL), \ + PCG_128BIT_CONSTANT(0x0000000000000001ULL, 0xda3e39cb94b95bdbULL) } +#endif + +/* Representations for the oneseq, mcg, and unique variants */ + +struct pcg_state_8 { + uint8_t state; +}; + +struct pcg_state_16 { + uint16_t state; +}; + +struct pcg_state_32 { + uint32_t state; +}; + +struct pcg_state_64 { + uint64_t state; +}; + +#if PCG_HAS_128BIT_OPS +struct pcg_state_128 { + pcg128_t state; +}; +#endif + +/* Representations setseq variants */ + +struct pcg_state_setseq_8 { + uint8_t state; + uint8_t inc; +}; + +struct pcg_state_setseq_16 { + uint16_t state; + uint16_t inc; +}; + +struct pcg_state_setseq_32 { + uint32_t state; + uint32_t inc; +}; + +struct pcg_state_setseq_64 { + uint64_t state; + uint64_t inc; +}; + +#if PCG_HAS_128BIT_OPS +struct pcg_state_setseq_128 { + pcg128_t state; + pcg128_t inc; +}; +#endif + +/* Multi-step advance functions (jump-ahead, jump-back) */ + +extern uint8_t pcg_advance_lcg_8(uint8_t state, uint8_t delta, uint8_t cur_mult, + uint8_t cur_plus); +extern uint16_t pcg_advance_lcg_16(uint16_t state, uint16_t delta, + uint16_t cur_mult, uint16_t cur_plus); +extern uint32_t pcg_advance_lcg_32(uint32_t state, uint32_t delta, + uint32_t cur_mult, uint32_t cur_plus); +extern uint64_t pcg_advance_lcg_64(uint64_t state, uint64_t delta, + uint64_t cur_mult, uint64_t cur_plus); + +#if PCG_HAS_128BIT_OPS +extern pcg128_t pcg_advance_lcg_128(pcg128_t state, pcg128_t delta, + pcg128_t cur_mult, pcg128_t cur_plus); +#endif + +/* Functions to advance the underlying LCG, one version for each size and + * each style. These functions are considered semi-private. There is rarely + * a good reason to call them directly. + */ + +inline void pcg_oneseq_8_step_r(struct pcg_state_8* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_8 + + PCG_DEFAULT_INCREMENT_8; +} + +inline void pcg_oneseq_8_advance_r(struct pcg_state_8* rng, uint8_t delta) +{ + rng->state = pcg_advance_lcg_8(rng->state, delta, PCG_DEFAULT_MULTIPLIER_8, + PCG_DEFAULT_INCREMENT_8); +} + +inline void pcg_mcg_8_step_r(struct pcg_state_8* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_8; +} + +inline void pcg_mcg_8_advance_r(struct pcg_state_8* rng, uint8_t delta) +{ + rng->state + = pcg_advance_lcg_8(rng->state, delta, PCG_DEFAULT_MULTIPLIER_8, 0u); +} + +inline void pcg_unique_8_step_r(struct pcg_state_8* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_8 + + (uint8_t)(((intptr_t)rng) | 1u); +} + +inline void pcg_unique_8_advance_r(struct pcg_state_8* rng, uint8_t delta) +{ + rng->state = pcg_advance_lcg_8(rng->state, delta, PCG_DEFAULT_MULTIPLIER_8, + (uint8_t)(((intptr_t)rng) | 1u)); +} + +inline void pcg_setseq_8_step_r(struct pcg_state_setseq_8* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_8 + rng->inc; +} + +inline void pcg_setseq_8_advance_r(struct pcg_state_setseq_8* rng, + uint8_t delta) +{ + rng->state = pcg_advance_lcg_8(rng->state, delta, PCG_DEFAULT_MULTIPLIER_8, + rng->inc); +} + +inline void pcg_oneseq_16_step_r(struct pcg_state_16* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_16 + + PCG_DEFAULT_INCREMENT_16; +} + +inline void pcg_oneseq_16_advance_r(struct pcg_state_16* rng, uint16_t delta) +{ + rng->state = pcg_advance_lcg_16( + rng->state, delta, PCG_DEFAULT_MULTIPLIER_16, PCG_DEFAULT_INCREMENT_16); +} + +inline void pcg_mcg_16_step_r(struct pcg_state_16* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_16; +} + +inline void pcg_mcg_16_advance_r(struct pcg_state_16* rng, uint16_t delta) +{ + rng->state + = pcg_advance_lcg_16(rng->state, delta, PCG_DEFAULT_MULTIPLIER_16, 0u); +} + +inline void pcg_unique_16_step_r(struct pcg_state_16* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_16 + + (uint16_t)(((intptr_t)rng) | 1u); +} + +inline void pcg_unique_16_advance_r(struct pcg_state_16* rng, uint16_t delta) +{ + rng->state + = pcg_advance_lcg_16(rng->state, delta, PCG_DEFAULT_MULTIPLIER_16, + (uint16_t)(((intptr_t)rng) | 1u)); +} + +inline void pcg_setseq_16_step_r(struct pcg_state_setseq_16* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_16 + rng->inc; +} + +inline void pcg_setseq_16_advance_r(struct pcg_state_setseq_16* rng, + uint16_t delta) +{ + rng->state = pcg_advance_lcg_16(rng->state, delta, + PCG_DEFAULT_MULTIPLIER_16, rng->inc); +} + +inline void pcg_oneseq_32_step_r(struct pcg_state_32* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_32 + + PCG_DEFAULT_INCREMENT_32; +} + +inline void pcg_oneseq_32_advance_r(struct pcg_state_32* rng, uint32_t delta) +{ + rng->state = pcg_advance_lcg_32( + rng->state, delta, PCG_DEFAULT_MULTIPLIER_32, PCG_DEFAULT_INCREMENT_32); +} + +inline void pcg_mcg_32_step_r(struct pcg_state_32* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_32; +} + +inline void pcg_mcg_32_advance_r(struct pcg_state_32* rng, uint32_t delta) +{ + rng->state + = pcg_advance_lcg_32(rng->state, delta, PCG_DEFAULT_MULTIPLIER_32, 0u); +} + +inline void pcg_unique_32_step_r(struct pcg_state_32* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_32 + + (uint32_t)(((intptr_t)rng) | 1u); +} + +inline void pcg_unique_32_advance_r(struct pcg_state_32* rng, uint32_t delta) +{ + rng->state + = pcg_advance_lcg_32(rng->state, delta, PCG_DEFAULT_MULTIPLIER_32, + (uint32_t)(((intptr_t)rng) | 1u)); +} + +inline void pcg_setseq_32_step_r(struct pcg_state_setseq_32* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_32 + rng->inc; +} + +inline void pcg_setseq_32_advance_r(struct pcg_state_setseq_32* rng, + uint32_t delta) +{ + rng->state = pcg_advance_lcg_32(rng->state, delta, + PCG_DEFAULT_MULTIPLIER_32, rng->inc); +} + +inline void pcg_oneseq_64_step_r(struct pcg_state_64* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_64 + + PCG_DEFAULT_INCREMENT_64; +} + +inline void pcg_oneseq_64_advance_r(struct pcg_state_64* rng, uint64_t delta) +{ + rng->state = pcg_advance_lcg_64( + rng->state, delta, PCG_DEFAULT_MULTIPLIER_64, PCG_DEFAULT_INCREMENT_64); +} + +inline void pcg_mcg_64_step_r(struct pcg_state_64* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_64; +} + +inline void pcg_mcg_64_advance_r(struct pcg_state_64* rng, uint64_t delta) +{ + rng->state + = pcg_advance_lcg_64(rng->state, delta, PCG_DEFAULT_MULTIPLIER_64, 0u); +} + +inline void pcg_unique_64_step_r(struct pcg_state_64* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_64 + + (uint64_t)(((intptr_t)rng) | 1u); +} + +inline void pcg_unique_64_advance_r(struct pcg_state_64* rng, uint64_t delta) +{ + rng->state + = pcg_advance_lcg_64(rng->state, delta, PCG_DEFAULT_MULTIPLIER_64, + (uint64_t)(((intptr_t)rng) | 1u)); +} + +inline void pcg_setseq_64_step_r(struct pcg_state_setseq_64* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_64 + rng->inc; +} + +inline void pcg_setseq_64_advance_r(struct pcg_state_setseq_64* rng, + uint64_t delta) +{ + rng->state = pcg_advance_lcg_64(rng->state, delta, + PCG_DEFAULT_MULTIPLIER_64, rng->inc); +} + +#if PCG_HAS_128BIT_OPS +inline void pcg_oneseq_128_step_r(struct pcg_state_128* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_128 + + PCG_DEFAULT_INCREMENT_128; +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_oneseq_128_advance_r(struct pcg_state_128* rng, pcg128_t delta) +{ + rng->state + = pcg_advance_lcg_128(rng->state, delta, PCG_DEFAULT_MULTIPLIER_128, + PCG_DEFAULT_INCREMENT_128); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_mcg_128_step_r(struct pcg_state_128* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_128; +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_mcg_128_advance_r(struct pcg_state_128* rng, pcg128_t delta) +{ + rng->state = pcg_advance_lcg_128(rng->state, delta, + PCG_DEFAULT_MULTIPLIER_128, 0u); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_unique_128_step_r(struct pcg_state_128* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_128 + + (pcg128_t)(((intptr_t)rng) | 1u); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_unique_128_advance_r(struct pcg_state_128* rng, pcg128_t delta) +{ + rng->state + = pcg_advance_lcg_128(rng->state, delta, PCG_DEFAULT_MULTIPLIER_128, + (pcg128_t)(((intptr_t)rng) | 1u)); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_setseq_128_step_r(struct pcg_state_setseq_128* rng) +{ + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_128 + rng->inc; +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_setseq_128_advance_r(struct pcg_state_setseq_128* rng, + pcg128_t delta) +{ + rng->state = pcg_advance_lcg_128(rng->state, delta, + PCG_DEFAULT_MULTIPLIER_128, rng->inc); +} +#endif + +/* Functions to seed the RNG state, one version for each size and each + * style. Unlike the step functions, regular users can and should call + * these functions. + */ + +inline void pcg_oneseq_8_srandom_r(struct pcg_state_8* rng, uint8_t initstate) +{ + rng->state = 0U; + pcg_oneseq_8_step_r(rng); + rng->state += initstate; + pcg_oneseq_8_step_r(rng); +} + +inline void pcg_mcg_8_srandom_r(struct pcg_state_8* rng, uint8_t initstate) +{ + rng->state = initstate | 1u; +} + +inline void pcg_unique_8_srandom_r(struct pcg_state_8* rng, uint8_t initstate) +{ + rng->state = 0U; + pcg_unique_8_step_r(rng); + rng->state += initstate; + pcg_unique_8_step_r(rng); +} + +inline void pcg_setseq_8_srandom_r(struct pcg_state_setseq_8* rng, + uint8_t initstate, uint8_t initseq) +{ + rng->state = 0U; + rng->inc = (initseq << 1u) | 1u; + pcg_setseq_8_step_r(rng); + rng->state += initstate; + pcg_setseq_8_step_r(rng); +} + +inline void pcg_oneseq_16_srandom_r(struct pcg_state_16* rng, + uint16_t initstate) +{ + rng->state = 0U; + pcg_oneseq_16_step_r(rng); + rng->state += initstate; + pcg_oneseq_16_step_r(rng); +} + +inline void pcg_mcg_16_srandom_r(struct pcg_state_16* rng, uint16_t initstate) +{ + rng->state = initstate | 1u; +} + +inline void pcg_unique_16_srandom_r(struct pcg_state_16* rng, + uint16_t initstate) +{ + rng->state = 0U; + pcg_unique_16_step_r(rng); + rng->state += initstate; + pcg_unique_16_step_r(rng); +} + +inline void pcg_setseq_16_srandom_r(struct pcg_state_setseq_16* rng, + uint16_t initstate, uint16_t initseq) +{ + rng->state = 0U; + rng->inc = (initseq << 1u) | 1u; + pcg_setseq_16_step_r(rng); + rng->state += initstate; + pcg_setseq_16_step_r(rng); +} + +inline void pcg_oneseq_32_srandom_r(struct pcg_state_32* rng, + uint32_t initstate) +{ + rng->state = 0U; + pcg_oneseq_32_step_r(rng); + rng->state += initstate; + pcg_oneseq_32_step_r(rng); +} + +inline void pcg_mcg_32_srandom_r(struct pcg_state_32* rng, uint32_t initstate) +{ + rng->state = initstate | 1u; +} + +inline void pcg_unique_32_srandom_r(struct pcg_state_32* rng, + uint32_t initstate) +{ + rng->state = 0U; + pcg_unique_32_step_r(rng); + rng->state += initstate; + pcg_unique_32_step_r(rng); +} + +inline void pcg_setseq_32_srandom_r(struct pcg_state_setseq_32* rng, + uint32_t initstate, uint32_t initseq) +{ + rng->state = 0U; + rng->inc = (initseq << 1u) | 1u; + pcg_setseq_32_step_r(rng); + rng->state += initstate; + pcg_setseq_32_step_r(rng); +} + +inline void pcg_oneseq_64_srandom_r(struct pcg_state_64* rng, + uint64_t initstate) +{ + rng->state = 0U; + pcg_oneseq_64_step_r(rng); + rng->state += initstate; + pcg_oneseq_64_step_r(rng); +} + +inline void pcg_mcg_64_srandom_r(struct pcg_state_64* rng, uint64_t initstate) +{ + rng->state = initstate | 1u; +} + +inline void pcg_unique_64_srandom_r(struct pcg_state_64* rng, + uint64_t initstate) +{ + rng->state = 0U; + pcg_unique_64_step_r(rng); + rng->state += initstate; + pcg_unique_64_step_r(rng); +} + +inline void pcg_setseq_64_srandom_r(struct pcg_state_setseq_64* rng, + uint64_t initstate, uint64_t initseq) +{ + rng->state = 0U; + rng->inc = (initseq << 1u) | 1u; + pcg_setseq_64_step_r(rng); + rng->state += initstate; + pcg_setseq_64_step_r(rng); +} + +#if PCG_HAS_128BIT_OPS +inline void pcg_oneseq_128_srandom_r(struct pcg_state_128* rng, + pcg128_t initstate) +{ + rng->state = 0U; + pcg_oneseq_128_step_r(rng); + rng->state += initstate; + pcg_oneseq_128_step_r(rng); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_mcg_128_srandom_r(struct pcg_state_128* rng, pcg128_t initstate) +{ + rng->state = initstate | 1u; +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_unique_128_srandom_r(struct pcg_state_128* rng, + pcg128_t initstate) +{ + rng->state = 0U; + pcg_unique_128_step_r(rng); + rng->state += initstate; + pcg_unique_128_step_r(rng); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_setseq_128_srandom_r(struct pcg_state_setseq_128* rng, + pcg128_t initstate, pcg128_t initseq) +{ + rng->state = 0U; + rng->inc = (initseq << 1u) | 1u; + pcg_setseq_128_step_r(rng); + rng->state += initstate; + pcg_setseq_128_step_r(rng); +} +#endif + +/* Now, finally we create each of the individual generators. We provide + * a random_r function that provides a random number of the appropriate + * type (using the full range of the type) and a boundedrand_r version + * that provides + * + * Implementation notes for boundedrand_r: + * + * To avoid bias, we need to make the range of the RNG a multiple of + * bound, which we do by dropping output less than a threshold. + * Let's consider a 32-bit case... A naive scheme to calculate the + * threshold would be to do + * + * uint32_t threshold = 0x100000000ull % bound; + * + * but 64-bit div/mod is slower than 32-bit div/mod (especially on + * 32-bit platforms). In essence, we do + * + * uint32_t threshold = (0x100000000ull-bound) % bound; + * + * because this version will calculate the same modulus, but the LHS + * value is less than 2^32. + * + * (Note that using modulo is only wise for good RNGs, poorer RNGs + * such as raw LCGs do better using a technique based on division.) + * Empricical tests show that division is preferable to modulus for + * reducting the range of an RNG. It's faster, and sometimes it can + * even be statistically prefereable. + */ + +/* Generation functions for XSH RS */ + +inline uint8_t pcg_oneseq_16_xsh_rs_8_random_r(struct pcg_state_16* rng) +{ + uint16_t oldstate = rng->state; + pcg_oneseq_16_step_r(rng); + return pcg_output_xsh_rs_16_8(oldstate); +} + +inline uint8_t pcg_oneseq_16_xsh_rs_8_boundedrand_r(struct pcg_state_16* rng, + uint8_t bound) +{ + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_oneseq_16_xsh_rs_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t pcg_oneseq_32_xsh_rs_16_random_r(struct pcg_state_32* rng) +{ + uint32_t oldstate = rng->state; + pcg_oneseq_32_step_r(rng); + return pcg_output_xsh_rs_32_16(oldstate); +} + +inline uint16_t pcg_oneseq_32_xsh_rs_16_boundedrand_r(struct pcg_state_32* rng, + uint16_t bound) +{ + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_oneseq_32_xsh_rs_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t pcg_oneseq_64_xsh_rs_32_random_r(struct pcg_state_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_oneseq_64_step_r(rng); + return pcg_output_xsh_rs_64_32(oldstate); +} + +inline uint32_t pcg_oneseq_64_xsh_rs_32_boundedrand_r(struct pcg_state_64* rng, + uint32_t bound) +{ + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_oneseq_64_xsh_rs_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_oneseq_128_xsh_rs_64_random_r(struct pcg_state_128* rng) +{ + pcg_oneseq_128_step_r(rng); + return pcg_output_xsh_rs_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_oneseq_128_xsh_rs_64_boundedrand_r(struct pcg_state_128* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_oneseq_128_xsh_rs_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint8_t pcg_unique_16_xsh_rs_8_random_r(struct pcg_state_16* rng) +{ + uint16_t oldstate = rng->state; + pcg_unique_16_step_r(rng); + return pcg_output_xsh_rs_16_8(oldstate); +} + +inline uint8_t pcg_unique_16_xsh_rs_8_boundedrand_r(struct pcg_state_16* rng, + uint8_t bound) +{ + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_unique_16_xsh_rs_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t pcg_unique_32_xsh_rs_16_random_r(struct pcg_state_32* rng) +{ + uint32_t oldstate = rng->state; + pcg_unique_32_step_r(rng); + return pcg_output_xsh_rs_32_16(oldstate); +} + +inline uint16_t pcg_unique_32_xsh_rs_16_boundedrand_r(struct pcg_state_32* rng, + uint16_t bound) +{ + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_unique_32_xsh_rs_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t pcg_unique_64_xsh_rs_32_random_r(struct pcg_state_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_unique_64_step_r(rng); + return pcg_output_xsh_rs_64_32(oldstate); +} + +inline uint32_t pcg_unique_64_xsh_rs_32_boundedrand_r(struct pcg_state_64* rng, + uint32_t bound) +{ + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_unique_64_xsh_rs_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_unique_128_xsh_rs_64_random_r(struct pcg_state_128* rng) +{ + pcg_unique_128_step_r(rng); + return pcg_output_xsh_rs_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_unique_128_xsh_rs_64_boundedrand_r(struct pcg_state_128* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_unique_128_xsh_rs_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint8_t pcg_setseq_16_xsh_rs_8_random_r(struct pcg_state_setseq_16* rng) +{ + uint16_t oldstate = rng->state; + pcg_setseq_16_step_r(rng); + return pcg_output_xsh_rs_16_8(oldstate); +} + +inline uint8_t +pcg_setseq_16_xsh_rs_8_boundedrand_r(struct pcg_state_setseq_16* rng, + uint8_t bound) +{ + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_setseq_16_xsh_rs_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t +pcg_setseq_32_xsh_rs_16_random_r(struct pcg_state_setseq_32* rng) +{ + uint32_t oldstate = rng->state; + pcg_setseq_32_step_r(rng); + return pcg_output_xsh_rs_32_16(oldstate); +} + +inline uint16_t +pcg_setseq_32_xsh_rs_16_boundedrand_r(struct pcg_state_setseq_32* rng, + uint16_t bound) +{ + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_setseq_32_xsh_rs_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t +pcg_setseq_64_xsh_rs_32_random_r(struct pcg_state_setseq_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_setseq_64_step_r(rng); + return pcg_output_xsh_rs_64_32(oldstate); +} + +inline uint32_t +pcg_setseq_64_xsh_rs_32_boundedrand_r(struct pcg_state_setseq_64* rng, + uint32_t bound) +{ + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_setseq_64_xsh_rs_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_setseq_128_xsh_rs_64_random_r(struct pcg_state_setseq_128* rng) +{ + pcg_setseq_128_step_r(rng); + return pcg_output_xsh_rs_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_setseq_128_xsh_rs_64_boundedrand_r(struct pcg_state_setseq_128* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_setseq_128_xsh_rs_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint8_t pcg_mcg_16_xsh_rs_8_random_r(struct pcg_state_16* rng) +{ + uint16_t oldstate = rng->state; + pcg_mcg_16_step_r(rng); + return pcg_output_xsh_rs_16_8(oldstate); +} + +inline uint8_t pcg_mcg_16_xsh_rs_8_boundedrand_r(struct pcg_state_16* rng, + uint8_t bound) +{ + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_mcg_16_xsh_rs_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t pcg_mcg_32_xsh_rs_16_random_r(struct pcg_state_32* rng) +{ + uint32_t oldstate = rng->state; + pcg_mcg_32_step_r(rng); + return pcg_output_xsh_rs_32_16(oldstate); +} + +inline uint16_t pcg_mcg_32_xsh_rs_16_boundedrand_r(struct pcg_state_32* rng, + uint16_t bound) +{ + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_mcg_32_xsh_rs_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t pcg_mcg_64_xsh_rs_32_random_r(struct pcg_state_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_mcg_64_step_r(rng); + return pcg_output_xsh_rs_64_32(oldstate); +} + +inline uint32_t pcg_mcg_64_xsh_rs_32_boundedrand_r(struct pcg_state_64* rng, + uint32_t bound) +{ + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_mcg_64_xsh_rs_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_mcg_128_xsh_rs_64_random_r(struct pcg_state_128* rng) +{ + pcg_mcg_128_step_r(rng); + return pcg_output_xsh_rs_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_mcg_128_xsh_rs_64_boundedrand_r(struct pcg_state_128* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_mcg_128_xsh_rs_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +/* Generation functions for XSH RR */ + +inline uint8_t pcg_oneseq_16_xsh_rr_8_random_r(struct pcg_state_16* rng) +{ + uint16_t oldstate = rng->state; + pcg_oneseq_16_step_r(rng); + return pcg_output_xsh_rr_16_8(oldstate); +} + +inline uint8_t pcg_oneseq_16_xsh_rr_8_boundedrand_r(struct pcg_state_16* rng, + uint8_t bound) +{ + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_oneseq_16_xsh_rr_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t pcg_oneseq_32_xsh_rr_16_random_r(struct pcg_state_32* rng) +{ + uint32_t oldstate = rng->state; + pcg_oneseq_32_step_r(rng); + return pcg_output_xsh_rr_32_16(oldstate); +} + +inline uint16_t pcg_oneseq_32_xsh_rr_16_boundedrand_r(struct pcg_state_32* rng, + uint16_t bound) +{ + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_oneseq_32_xsh_rr_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t pcg_oneseq_64_xsh_rr_32_random_r(struct pcg_state_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_oneseq_64_step_r(rng); + return pcg_output_xsh_rr_64_32(oldstate); +} + +inline uint32_t pcg_oneseq_64_xsh_rr_32_boundedrand_r(struct pcg_state_64* rng, + uint32_t bound) +{ + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_oneseq_64_xsh_rr_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_oneseq_128_xsh_rr_64_random_r(struct pcg_state_128* rng) +{ + pcg_oneseq_128_step_r(rng); + return pcg_output_xsh_rr_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_oneseq_128_xsh_rr_64_boundedrand_r(struct pcg_state_128* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_oneseq_128_xsh_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint8_t pcg_unique_16_xsh_rr_8_random_r(struct pcg_state_16* rng) +{ + uint16_t oldstate = rng->state; + pcg_unique_16_step_r(rng); + return pcg_output_xsh_rr_16_8(oldstate); +} + +inline uint8_t pcg_unique_16_xsh_rr_8_boundedrand_r(struct pcg_state_16* rng, + uint8_t bound) +{ + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_unique_16_xsh_rr_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t pcg_unique_32_xsh_rr_16_random_r(struct pcg_state_32* rng) +{ + uint32_t oldstate = rng->state; + pcg_unique_32_step_r(rng); + return pcg_output_xsh_rr_32_16(oldstate); +} + +inline uint16_t pcg_unique_32_xsh_rr_16_boundedrand_r(struct pcg_state_32* rng, + uint16_t bound) +{ + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_unique_32_xsh_rr_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t pcg_unique_64_xsh_rr_32_random_r(struct pcg_state_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_unique_64_step_r(rng); + return pcg_output_xsh_rr_64_32(oldstate); +} + +inline uint32_t pcg_unique_64_xsh_rr_32_boundedrand_r(struct pcg_state_64* rng, + uint32_t bound) +{ + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_unique_64_xsh_rr_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_unique_128_xsh_rr_64_random_r(struct pcg_state_128* rng) +{ + pcg_unique_128_step_r(rng); + return pcg_output_xsh_rr_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_unique_128_xsh_rr_64_boundedrand_r(struct pcg_state_128* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_unique_128_xsh_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint8_t pcg_setseq_16_xsh_rr_8_random_r(struct pcg_state_setseq_16* rng) +{ + uint16_t oldstate = rng->state; + pcg_setseq_16_step_r(rng); + return pcg_output_xsh_rr_16_8(oldstate); +} + +inline uint8_t +pcg_setseq_16_xsh_rr_8_boundedrand_r(struct pcg_state_setseq_16* rng, + uint8_t bound) +{ + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_setseq_16_xsh_rr_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t +pcg_setseq_32_xsh_rr_16_random_r(struct pcg_state_setseq_32* rng) +{ + uint32_t oldstate = rng->state; + pcg_setseq_32_step_r(rng); + return pcg_output_xsh_rr_32_16(oldstate); +} + +inline uint16_t +pcg_setseq_32_xsh_rr_16_boundedrand_r(struct pcg_state_setseq_32* rng, + uint16_t bound) +{ + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_setseq_32_xsh_rr_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t +pcg_setseq_64_xsh_rr_32_random_r(struct pcg_state_setseq_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_setseq_64_step_r(rng); + return pcg_output_xsh_rr_64_32(oldstate); +} + +inline uint32_t +pcg_setseq_64_xsh_rr_32_boundedrand_r(struct pcg_state_setseq_64* rng, + uint32_t bound) +{ + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_setseq_64_xsh_rr_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_setseq_128_xsh_rr_64_random_r(struct pcg_state_setseq_128* rng) +{ + pcg_setseq_128_step_r(rng); + return pcg_output_xsh_rr_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_setseq_128_xsh_rr_64_boundedrand_r(struct pcg_state_setseq_128* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_setseq_128_xsh_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint8_t pcg_mcg_16_xsh_rr_8_random_r(struct pcg_state_16* rng) +{ + uint16_t oldstate = rng->state; + pcg_mcg_16_step_r(rng); + return pcg_output_xsh_rr_16_8(oldstate); +} + +inline uint8_t pcg_mcg_16_xsh_rr_8_boundedrand_r(struct pcg_state_16* rng, + uint8_t bound) +{ + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_mcg_16_xsh_rr_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t pcg_mcg_32_xsh_rr_16_random_r(struct pcg_state_32* rng) +{ + uint32_t oldstate = rng->state; + pcg_mcg_32_step_r(rng); + return pcg_output_xsh_rr_32_16(oldstate); +} + +inline uint16_t pcg_mcg_32_xsh_rr_16_boundedrand_r(struct pcg_state_32* rng, + uint16_t bound) +{ + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_mcg_32_xsh_rr_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t pcg_mcg_64_xsh_rr_32_random_r(struct pcg_state_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_mcg_64_step_r(rng); + return pcg_output_xsh_rr_64_32(oldstate); +} + +inline uint32_t pcg_mcg_64_xsh_rr_32_boundedrand_r(struct pcg_state_64* rng, + uint32_t bound) +{ + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_mcg_64_xsh_rr_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_mcg_128_xsh_rr_64_random_r(struct pcg_state_128* rng) +{ + pcg_mcg_128_step_r(rng); + return pcg_output_xsh_rr_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_mcg_128_xsh_rr_64_boundedrand_r(struct pcg_state_128* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_mcg_128_xsh_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +/* Generation functions for RXS M XS (no MCG versions because they + * don't make sense when you want to use the entire state) + */ + +inline uint8_t pcg_oneseq_8_rxs_m_xs_8_random_r(struct pcg_state_8* rng) +{ + uint8_t oldstate = rng->state; + pcg_oneseq_8_step_r(rng); + return pcg_output_rxs_m_xs_8_8(oldstate); +} + +inline uint8_t pcg_oneseq_8_rxs_m_xs_8_boundedrand_r(struct pcg_state_8* rng, + uint8_t bound) +{ + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_oneseq_8_rxs_m_xs_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t pcg_oneseq_16_rxs_m_xs_16_random_r(struct pcg_state_16* rng) +{ + uint16_t oldstate = rng->state; + pcg_oneseq_16_step_r(rng); + return pcg_output_rxs_m_xs_16_16(oldstate); +} + +inline uint16_t +pcg_oneseq_16_rxs_m_xs_16_boundedrand_r(struct pcg_state_16* rng, + uint16_t bound) +{ + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_oneseq_16_rxs_m_xs_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t pcg_oneseq_32_rxs_m_xs_32_random_r(struct pcg_state_32* rng) +{ + uint32_t oldstate = rng->state; + pcg_oneseq_32_step_r(rng); + return pcg_output_rxs_m_xs_32_32(oldstate); +} + +inline uint32_t +pcg_oneseq_32_rxs_m_xs_32_boundedrand_r(struct pcg_state_32* rng, + uint32_t bound) +{ + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_oneseq_32_rxs_m_xs_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint64_t pcg_oneseq_64_rxs_m_xs_64_random_r(struct pcg_state_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_oneseq_64_step_r(rng); + return pcg_output_rxs_m_xs_64_64(oldstate); +} + +inline uint64_t +pcg_oneseq_64_rxs_m_xs_64_boundedrand_r(struct pcg_state_64* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_oneseq_64_rxs_m_xs_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t pcg_oneseq_128_rxs_m_xs_128_random_r(struct pcg_state_128* rng) +{ + pcg_oneseq_128_step_r(rng); + return pcg_output_rxs_m_xs_128_128(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_oneseq_128_rxs_m_xs_128_boundedrand_r(struct pcg_state_128* rng, + pcg128_t bound) +{ + pcg128_t threshold = -bound % bound; + for (;;) { + pcg128_t r = pcg_oneseq_128_rxs_m_xs_128_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint16_t pcg_unique_16_rxs_m_xs_16_random_r(struct pcg_state_16* rng) +{ + uint16_t oldstate = rng->state; + pcg_unique_16_step_r(rng); + return pcg_output_rxs_m_xs_16_16(oldstate); +} + +inline uint16_t +pcg_unique_16_rxs_m_xs_16_boundedrand_r(struct pcg_state_16* rng, + uint16_t bound) +{ + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_unique_16_rxs_m_xs_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t pcg_unique_32_rxs_m_xs_32_random_r(struct pcg_state_32* rng) +{ + uint32_t oldstate = rng->state; + pcg_unique_32_step_r(rng); + return pcg_output_rxs_m_xs_32_32(oldstate); +} + +inline uint32_t +pcg_unique_32_rxs_m_xs_32_boundedrand_r(struct pcg_state_32* rng, + uint32_t bound) +{ + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_unique_32_rxs_m_xs_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint64_t pcg_unique_64_rxs_m_xs_64_random_r(struct pcg_state_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_unique_64_step_r(rng); + return pcg_output_rxs_m_xs_64_64(oldstate); +} + +inline uint64_t +pcg_unique_64_rxs_m_xs_64_boundedrand_r(struct pcg_state_64* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_unique_64_rxs_m_xs_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t pcg_unique_128_rxs_m_xs_128_random_r(struct pcg_state_128* rng) +{ + pcg_unique_128_step_r(rng); + return pcg_output_rxs_m_xs_128_128(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_unique_128_rxs_m_xs_128_boundedrand_r(struct pcg_state_128* rng, + pcg128_t bound) +{ + pcg128_t threshold = -bound % bound; + for (;;) { + pcg128_t r = pcg_unique_128_rxs_m_xs_128_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint8_t pcg_setseq_8_rxs_m_xs_8_random_r(struct pcg_state_setseq_8* rng) +{ + uint8_t oldstate = rng->state; + pcg_setseq_8_step_r(rng); + return pcg_output_rxs_m_xs_8_8(oldstate); +} + +inline uint8_t +pcg_setseq_8_rxs_m_xs_8_boundedrand_r(struct pcg_state_setseq_8* rng, + uint8_t bound) +{ + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_setseq_8_rxs_m_xs_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t +pcg_setseq_16_rxs_m_xs_16_random_r(struct pcg_state_setseq_16* rng) +{ + uint16_t oldstate = rng->state; + pcg_setseq_16_step_r(rng); + return pcg_output_rxs_m_xs_16_16(oldstate); +} + +inline uint16_t +pcg_setseq_16_rxs_m_xs_16_boundedrand_r(struct pcg_state_setseq_16* rng, + uint16_t bound) +{ + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_setseq_16_rxs_m_xs_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t +pcg_setseq_32_rxs_m_xs_32_random_r(struct pcg_state_setseq_32* rng) +{ + uint32_t oldstate = rng->state; + pcg_setseq_32_step_r(rng); + return pcg_output_rxs_m_xs_32_32(oldstate); +} + +inline uint32_t +pcg_setseq_32_rxs_m_xs_32_boundedrand_r(struct pcg_state_setseq_32* rng, + uint32_t bound) +{ + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_setseq_32_rxs_m_xs_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint64_t +pcg_setseq_64_rxs_m_xs_64_random_r(struct pcg_state_setseq_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_setseq_64_step_r(rng); + return pcg_output_rxs_m_xs_64_64(oldstate); +} + +inline uint64_t +pcg_setseq_64_rxs_m_xs_64_boundedrand_r(struct pcg_state_setseq_64* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_setseq_64_rxs_m_xs_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_setseq_128_rxs_m_xs_128_random_r(struct pcg_state_setseq_128* rng) +{ + pcg_setseq_128_step_r(rng); + return pcg_output_rxs_m_xs_128_128(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_setseq_128_rxs_m_xs_128_boundedrand_r(struct pcg_state_setseq_128* rng, + pcg128_t bound) +{ + pcg128_t threshold = -bound % bound; + for (;;) { + pcg128_t r = pcg_setseq_128_rxs_m_xs_128_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +/* Generation functions for XSL RR (only defined for "large" types) */ + +inline uint32_t pcg_oneseq_64_xsl_rr_32_random_r(struct pcg_state_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_oneseq_64_step_r(rng); + return pcg_output_xsl_rr_64_32(oldstate); +} + +inline uint32_t pcg_oneseq_64_xsl_rr_32_boundedrand_r(struct pcg_state_64* rng, + uint32_t bound) +{ + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_oneseq_64_xsl_rr_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_oneseq_128_xsl_rr_64_random_r(struct pcg_state_128* rng) +{ + pcg_oneseq_128_step_r(rng); + return pcg_output_xsl_rr_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_oneseq_128_xsl_rr_64_boundedrand_r(struct pcg_state_128* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_oneseq_128_xsl_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint32_t pcg_unique_64_xsl_rr_32_random_r(struct pcg_state_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_unique_64_step_r(rng); + return pcg_output_xsl_rr_64_32(oldstate); +} + +inline uint32_t pcg_unique_64_xsl_rr_32_boundedrand_r(struct pcg_state_64* rng, + uint32_t bound) +{ + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_unique_64_xsl_rr_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_unique_128_xsl_rr_64_random_r(struct pcg_state_128* rng) +{ + pcg_unique_128_step_r(rng); + return pcg_output_xsl_rr_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_unique_128_xsl_rr_64_boundedrand_r(struct pcg_state_128* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_unique_128_xsl_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint32_t +pcg_setseq_64_xsl_rr_32_random_r(struct pcg_state_setseq_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_setseq_64_step_r(rng); + return pcg_output_xsl_rr_64_32(oldstate); +} + +inline uint32_t +pcg_setseq_64_xsl_rr_32_boundedrand_r(struct pcg_state_setseq_64* rng, + uint32_t bound) +{ + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_setseq_64_xsl_rr_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_setseq_128_xsl_rr_64_random_r(struct pcg_state_setseq_128* rng) +{ + pcg_setseq_128_step_r(rng); + return pcg_output_xsl_rr_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_setseq_128_xsl_rr_64_boundedrand_r(struct pcg_state_setseq_128* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_setseq_128_xsl_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint32_t pcg_mcg_64_xsl_rr_32_random_r(struct pcg_state_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_mcg_64_step_r(rng); + return pcg_output_xsl_rr_64_32(oldstate); +} + +inline uint32_t pcg_mcg_64_xsl_rr_32_boundedrand_r(struct pcg_state_64* rng, + uint32_t bound) +{ + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_mcg_64_xsl_rr_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_mcg_128_xsl_rr_64_random_r(struct pcg_state_128* rng) +{ + pcg_mcg_128_step_r(rng); + return pcg_output_xsl_rr_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_mcg_128_xsl_rr_64_boundedrand_r(struct pcg_state_128* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_mcg_128_xsl_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +/* Generation functions for XSL RR RR (only defined for "large" types) */ + +inline uint64_t pcg_oneseq_64_xsl_rr_rr_64_random_r(struct pcg_state_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_oneseq_64_step_r(rng); + return pcg_output_xsl_rr_rr_64_64(oldstate); +} + +inline uint64_t +pcg_oneseq_64_xsl_rr_rr_64_boundedrand_r(struct pcg_state_64* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_oneseq_64_xsl_rr_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t pcg_oneseq_128_xsl_rr_rr_128_random_r(struct pcg_state_128* rng) +{ + pcg_oneseq_128_step_r(rng); + return pcg_output_xsl_rr_rr_128_128(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_oneseq_128_xsl_rr_rr_128_boundedrand_r(struct pcg_state_128* rng, + pcg128_t bound) +{ + pcg128_t threshold = -bound % bound; + for (;;) { + pcg128_t r = pcg_oneseq_128_xsl_rr_rr_128_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint64_t pcg_unique_64_xsl_rr_rr_64_random_r(struct pcg_state_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_unique_64_step_r(rng); + return pcg_output_xsl_rr_rr_64_64(oldstate); +} + +inline uint64_t +pcg_unique_64_xsl_rr_rr_64_boundedrand_r(struct pcg_state_64* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_unique_64_xsl_rr_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t pcg_unique_128_xsl_rr_rr_128_random_r(struct pcg_state_128* rng) +{ + pcg_unique_128_step_r(rng); + return pcg_output_xsl_rr_rr_128_128(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_unique_128_xsl_rr_rr_128_boundedrand_r(struct pcg_state_128* rng, + pcg128_t bound) +{ + pcg128_t threshold = -bound % bound; + for (;;) { + pcg128_t r = pcg_unique_128_xsl_rr_rr_128_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint64_t +pcg_setseq_64_xsl_rr_rr_64_random_r(struct pcg_state_setseq_64* rng) +{ + uint64_t oldstate = rng->state; + pcg_setseq_64_step_r(rng); + return pcg_output_xsl_rr_rr_64_64(oldstate); +} + +inline uint64_t +pcg_setseq_64_xsl_rr_rr_64_boundedrand_r(struct pcg_state_setseq_64* rng, + uint64_t bound) +{ + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_setseq_64_xsl_rr_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_setseq_128_xsl_rr_rr_128_random_r(struct pcg_state_setseq_128* rng) +{ + pcg_setseq_128_step_r(rng); + return pcg_output_xsl_rr_rr_128_128(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_setseq_128_xsl_rr_rr_128_boundedrand_r(struct pcg_state_setseq_128* rng, + pcg128_t bound) +{ + pcg128_t threshold = -bound % bound; + for (;;) { + pcg128_t r = pcg_setseq_128_xsl_rr_rr_128_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +//// Typedefs +typedef struct pcg_state_setseq_64 pcg32_random_t; +typedef struct pcg_state_64 pcg32s_random_t; +typedef struct pcg_state_64 pcg32u_random_t; +typedef struct pcg_state_64 pcg32f_random_t; +//// random_r +#define pcg32_random_r pcg_setseq_64_xsh_rr_32_random_r +#define pcg32s_random_r pcg_oneseq_64_xsh_rr_32_random_r +#define pcg32u_random_r pcg_unique_64_xsh_rr_32_random_r +#define pcg32f_random_r pcg_mcg_64_xsh_rs_32_random_r +//// boundedrand_r +#define pcg32_boundedrand_r pcg_setseq_64_xsh_rr_32_boundedrand_r +#define pcg32s_boundedrand_r pcg_oneseq_64_xsh_rr_32_boundedrand_r +#define pcg32u_boundedrand_r pcg_unique_64_xsh_rr_32_boundedrand_r +#define pcg32f_boundedrand_r pcg_mcg_64_xsh_rs_32_boundedrand_r +//// srandom_r +#define pcg32_srandom_r pcg_setseq_64_srandom_r +#define pcg32s_srandom_r pcg_oneseq_64_srandom_r +#define pcg32u_srandom_r pcg_unique_64_srandom_r +#define pcg32f_srandom_r pcg_mcg_64_srandom_r +//// advance_r +#define pcg32_advance_r pcg_setseq_64_advance_r +#define pcg32s_advance_r pcg_oneseq_64_advance_r +#define pcg32u_advance_r pcg_unique_64_advance_r +#define pcg32f_advance_r pcg_mcg_64_advance_r + +#if PCG_HAS_128BIT_OPS +//// Typedefs +typedef struct pcg_state_setseq_128 pcg64_random_t; +typedef struct pcg_state_128 pcg64s_random_t; +typedef struct pcg_state_128 pcg64u_random_t; +typedef struct pcg_state_128 pcg64f_random_t; +//// random_r +#define pcg64_random_r pcg_setseq_128_xsl_rr_64_random_r +#define pcg64s_random_r pcg_oneseq_128_xsl_rr_64_random_r +#define pcg64u_random_r pcg_unique_128_xsl_rr_64_random_r +#define pcg64f_random_r pcg_mcg_128_xsl_rr_64_random_r +//// boundedrand_r +#define pcg64_boundedrand_r pcg_setseq_128_xsl_rr_64_boundedrand_r +#define pcg64s_boundedrand_r pcg_oneseq_128_xsl_rr_64_boundedrand_r +#define pcg64u_boundedrand_r pcg_unique_128_xsl_rr_64_boundedrand_r +#define pcg64f_boundedrand_r pcg_mcg_128_xsl_rr_64_boundedrand_r +//// srandom_r +#define pcg64_srandom_r pcg_setseq_128_srandom_r +#define pcg64s_srandom_r pcg_oneseq_128_srandom_r +#define pcg64u_srandom_r pcg_unique_128_srandom_r +#define pcg64f_srandom_r pcg_mcg_128_srandom_r +//// advance_r +#define pcg64_advance_r pcg_setseq_128_advance_r +#define pcg64s_advance_r pcg_oneseq_128_advance_r +#define pcg64u_advance_r pcg_unique_128_advance_r +#define pcg64f_advance_r pcg_mcg_128_advance_r +#endif + +//// Typedefs +typedef struct pcg_state_8 pcg8si_random_t; +typedef struct pcg_state_16 pcg16si_random_t; +typedef struct pcg_state_32 pcg32si_random_t; +typedef struct pcg_state_64 pcg64si_random_t; +//// random_r +#define pcg8si_random_r pcg_oneseq_8_rxs_m_xs_8_random_r +#define pcg16si_random_r pcg_oneseq_16_rxs_m_xs_16_random_r +#define pcg32si_random_r pcg_oneseq_32_rxs_m_xs_32_random_r +#define pcg64si_random_r pcg_oneseq_64_rxs_m_xs_64_random_r +//// boundedrand_r +#define pcg8si_boundedrand_r pcg_oneseq_8_rxs_m_xs_8_boundedrand_r +#define pcg16si_boundedrand_r pcg_oneseq_16_rxs_m_xs_16_boundedrand_r +#define pcg32si_boundedrand_r pcg_oneseq_32_rxs_m_xs_32_boundedrand_r +#define pcg64si_boundedrand_r pcg_oneseq_64_rxs_m_xs_64_boundedrand_r +//// srandom_r +#define pcg8si_srandom_r pcg_oneseq_8_srandom_r +#define pcg16si_srandom_r pcg_oneseq_16_srandom_r +#define pcg32si_srandom_r pcg_oneseq_32_srandom_r +#define pcg64si_srandom_r pcg_oneseq_64_srandom_r +//// advance_r +#define pcg8si_advance_r pcg_oneseq_8_advance_r +#define pcg16si_advance_r pcg_oneseq_16_advance_r +#define pcg32si_advance_r pcg_oneseq_32_advance_r +#define pcg64si_advance_r pcg_oneseq_64_advance_r + +#if PCG_HAS_128BIT_OPS +typedef struct pcg_state_128 pcg128si_random_t; +#define pcg128si_random_r pcg_oneseq_128_rxs_m_xs_128_random_r +#define pcg128si_boundedrand_r pcg_oneseq_128_rxs_m_xs_128_boundedrand_r +#define pcg128si_srandom_r pcg_oneseq_128_srandom_r +#define pcg128si_advance_r pcg_oneseq_128_advance_r +#endif + +//// Typedefs +typedef struct pcg_state_setseq_8 pcg8i_random_t; +typedef struct pcg_state_setseq_16 pcg16i_random_t; +typedef struct pcg_state_setseq_32 pcg32i_random_t; +typedef struct pcg_state_setseq_64 pcg64i_random_t; +//// random_r +#define pcg8i_random_r pcg_setseq_8_rxs_m_xs_8_random_r +#define pcg16i_random_r pcg_setseq_16_rxs_m_xs_16_random_r +#define pcg32i_random_r pcg_setseq_32_rxs_m_xs_32_random_r +#define pcg64i_random_r pcg_setseq_64_rxs_m_xs_64_random_r +//// boundedrand_r +#define pcg8i_boundedrand_r pcg_setseq_8_rxs_m_xs_8_boundedrand_r +#define pcg16i_boundedrand_r pcg_setseq_16_rxs_m_xs_16_boundedrand_r +#define pcg32i_boundedrand_r pcg_setseq_32_rxs_m_xs_32_boundedrand_r +#define pcg64i_boundedrand_r pcg_setseq_64_rxs_m_xs_64_boundedrand_r +//// srandom_r +#define pcg8i_srandom_r pcg_setseq_8_srandom_r +#define pcg16i_srandom_r pcg_setseq_16_srandom_r +#define pcg32i_srandom_r pcg_setseq_32_srandom_r +#define pcg64i_srandom_r pcg_setseq_64_srandom_r +//// advance_r +#define pcg8i_advance_r pcg_setseq_8_advance_r +#define pcg16i_advance_r pcg_setseq_16_advance_r +#define pcg32i_advance_r pcg_setseq_32_advance_r +#define pcg64i_advance_r pcg_setseq_64_advance_r + +#if PCG_HAS_128BIT_OPS +typedef struct pcg_state_setseq_128 pcg128i_random_t; +#define pcg128i_random_r pcg_setseq_128_rxs_m_xs_128_random_r +#define pcg128i_boundedrand_r pcg_setseq_128_rxs_m_xs_128_boundedrand_r +#define pcg128i_srandom_r pcg_setseq_128_srandom_r +#define pcg128i_advance_r pcg_setseq_128_advance_r +#endif + +extern uint32_t pcg32_random(); +extern uint32_t pcg32_boundedrand(uint32_t bound); +extern void pcg32_srandom(uint64_t seed, uint64_t seq); +extern void pcg32_advance(uint64_t delta); + +#if PCG_HAS_128BIT_OPS +extern uint64_t pcg64_random(); +extern uint64_t pcg64_boundedrand(uint64_t bound); +extern void pcg64_srandom(pcg128_t seed, pcg128_t seq); +extern void pcg64_advance(pcg128_t delta); +#endif + +/* + * Static initialization constants (if you can't call srandom for some + * bizarre reason). + */ + +#define PCG32_INITIALIZER PCG_STATE_SETSEQ_64_INITIALIZER +#define PCG32U_INITIALIZER PCG_STATE_UNIQUE_64_INITIALIZER +#define PCG32S_INITIALIZER PCG_STATE_ONESEQ_64_INITIALIZER +#define PCG32F_INITIALIZER PCG_STATE_MCG_64_INITIALIZER + +#if PCG_HAS_128BIT_OPS +#define PCG64_INITIALIZER PCG_STATE_SETSEQ_128_INITIALIZER +#define PCG64U_INITIALIZER PCG_STATE_UNIQUE_128_INITIALIZER +#define PCG64S_INITIALIZER PCG_STATE_ONESEQ_128_INITIALIZER +#define PCG64F_INITIALIZER PCG_STATE_MCG_128_INITIALIZER +#endif + +#define PCG8SI_INITIALIZER PCG_STATE_ONESEQ_8_INITIALIZER +#define PCG16SI_INITIALIZER PCG_STATE_ONESEQ_16_INITIALIZER +#define PCG32SI_INITIALIZER PCG_STATE_ONESEQ_32_INITIALIZER +#define PCG64SI_INITIALIZER PCG_STATE_ONESEQ_64_INITIALIZER +#if PCG_HAS_128BIT_OPS +#define PCG128SI_INITIALIZER PCG_STATE_ONESEQ_128_INITIALIZER +#endif + +#define PCG8I_INITIALIZER PCG_STATE_SETSEQ_8_INITIALIZER +#define PCG16I_INITIALIZER PCG_STATE_SETSEQ_16_INITIALIZER +#define PCG32I_INITIALIZER PCG_STATE_SETSEQ_32_INITIALIZER +#define PCG64I_INITIALIZER PCG_STATE_SETSEQ_64_INITIALIZER +#if PCG_HAS_128BIT_OPS +#define PCG128I_INITIALIZER PCG_STATE_SETSEQ_128_INITIALIZER +#endif + +#if __cplusplus +} +#endif + +#endif // PCG_VARIANTS_H_INCLUDED diff --git a/numpy/random/src/pcg64/LICENSE.md b/numpy/random/src/pcg64/LICENSE.md new file mode 100644 index 000000000..dd6a17ee8 --- /dev/null +++ b/numpy/random/src/pcg64/LICENSE.md @@ -0,0 +1,22 @@ +# PCG64 + +PCG Random Number Generation for C. + +Copyright 2014 Melissa O'Neill <oneill@pcg-random.org> + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. + +For additional information about the PCG random number generation scheme, +including its license and other licensing options, visit + + http://www.pcg-random.org diff --git a/numpy/random/src/pcg64/pcg64-benchmark.c b/numpy/random/src/pcg64/pcg64-benchmark.c new file mode 100644 index 000000000..76f3ec78c --- /dev/null +++ b/numpy/random/src/pcg64/pcg64-benchmark.c @@ -0,0 +1,42 @@ +/* + * cl pcg64-benchmark.c pcg64.c ../splitmix64/splitmix64.c /Ox + * Measure-Command { .\xoroshiro128-benchmark.exe } + * + * gcc pcg64-benchmark.c pcg64.c ../splitmix64/splitmix64.c -O3 -o + * pcg64-benchmark + * time ./pcg64-benchmark + */ +#include "../splitmix64/splitmix64.h" +#include "pcg64.h" +#include <inttypes.h> +#include <stdio.h> +#include <time.h> + +#define N 1000000000 + +int main() { + pcg64_random_t rng; + uint64_t sum = 0, count = 0; + uint64_t seed = 0xDEADBEAF; + int i; +#if __SIZEOF_INT128__ && !defined(PCG_FORCE_EMULATED_128BIT_MATH) + rng.state = (__uint128_t)splitmix64_next(&seed) << 64; + rng.state |= splitmix64_next(&seed); + rng.inc = (__uint128_t)1; +#else + rng.state.high = splitmix64_next(&seed); + rng.state.low = splitmix64_next(&seed); + rng.inc.high = 0; + rng.inc.low = 1; +#endif + clock_t begin = clock(); + for (i = 0; i < N; i++) { + sum += pcg64_random_r(&rng); + count++; + } + clock_t end = clock(); + double time_spent = (double)(end - begin) / CLOCKS_PER_SEC; + printf("0x%" PRIx64 "\ncount: %" PRIu64 "\n", sum, count); + printf("%" PRIu64 " randoms per second\n", + (uint64_t)(N / time_spent) / 1000000 * 1000000); +} diff --git a/numpy/random/src/pcg64/pcg64-test-data-gen.c b/numpy/random/src/pcg64/pcg64-test-data-gen.c new file mode 100644 index 000000000..0c2b079a3 --- /dev/null +++ b/numpy/random/src/pcg64/pcg64-test-data-gen.c @@ -0,0 +1,73 @@ +/* + * Generate testing csv files + * + * GCC only + * + * gcc pcg64-test-data-gen.c pcg64.orig.c ../splitmix64/splitmix64.c -o + * pgc64-test-data-gen + */ + +#include "pcg64.orig.h" +#include <inttypes.h> +#include <stdio.h> + +#define N 1000 + +int main() { + pcg64_random_t rng; + uint64_t state, seed = 0xDEADBEAF; + state = seed; + __uint128_t temp, s, inc; + int i; + uint64_t store[N]; + s = (__uint128_t)seed; + inc = (__uint128_t)0; + pcg64_srandom_r(&rng, s, inc); + printf("0x%" PRIx64, (uint64_t)(rng.state >> 64)); + printf("%" PRIx64 "\n", (uint64_t)rng.state); + printf("0x%" PRIx64, (uint64_t)(rng.inc >> 64)); + printf("%" PRIx64 "\n", (uint64_t)rng.inc); + for (i = 0; i < N; i++) { + store[i] = pcg64_random_r(&rng); + } + + FILE *fp; + fp = fopen("pcg64-testset-1.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); + + state = seed = 0; + s = (__uint128_t)seed; + i = (__uint128_t)0; + pcg64_srandom_r(&rng, s, i); + printf("0x%" PRIx64, (uint64_t)(rng.state >> 64)); + printf("%" PRIx64 "\n", (uint64_t)rng.state); + printf("0x%" PRIx64, (uint64_t)(rng.inc >> 64)); + printf("%" PRIx64 "\n", (uint64_t)rng.inc); + for (i = 0; i < N; i++) { + store[i] = pcg64_random_r(&rng); + } + fp = fopen("pcg64-testset-2.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); +} diff --git a/numpy/random/src/pcg64/pcg64.c b/numpy/random/src/pcg64/pcg64.c new file mode 100644 index 000000000..c7c1eb045 --- /dev/null +++ b/numpy/random/src/pcg64/pcg64.c @@ -0,0 +1,118 @@ +/* + * PCG64 Random Number Generation for C. + * + * Copyright 2014 Melissa O'Neill <oneill@pcg-random.org> + * Copyright 2015 Robert Kern <robert.kern@gmail.com> + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For additional information about the PCG random number generation scheme, + * including its license and other licensing options, visit + * + * http://www.pcg-random.org + */ + +#include "pcg64.h" + +extern inline void pcg_setseq_128_step_r(pcg_state_setseq_128 *rng); +extern inline uint64_t pcg_output_xsl_rr_128_64(pcg128_t state); +extern inline void pcg_setseq_128_srandom_r(pcg_state_setseq_128 *rng, + pcg128_t initstate, + pcg128_t initseq); +extern inline uint64_t +pcg_setseq_128_xsl_rr_64_random_r(pcg_state_setseq_128 *rng); +extern inline uint64_t +pcg_setseq_128_xsl_rr_64_boundedrand_r(pcg_state_setseq_128 *rng, + uint64_t bound); +extern inline void pcg_setseq_128_advance_r(pcg_state_setseq_128 *rng, + pcg128_t delta); + +/* Multi-step advance functions (jump-ahead, jump-back) + * + * The method used here is based on Brown, "Random Number Generation + * with Arbitrary Stride,", Transactions of the American Nuclear + * Society (Nov. 1994). The algorithm is very similar to fast + * exponentiation. + * + * Even though delta is an unsigned integer, we can pass a + * signed integer to go backwards, it just goes "the long way round". + */ + +#ifndef PCG_EMULATED_128BIT_MATH + +pcg128_t pcg_advance_lcg_128(pcg128_t state, pcg128_t delta, pcg128_t cur_mult, + pcg128_t cur_plus) { + pcg128_t acc_mult = 1u; + pcg128_t acc_plus = 0u; + while (delta > 0) { + if (delta & 1) { + acc_mult *= cur_mult; + acc_plus = acc_plus * cur_mult + cur_plus; + } + cur_plus = (cur_mult + 1) * cur_plus; + cur_mult *= cur_mult; + delta /= 2; + } + return acc_mult * state + acc_plus; +} + +#else + +pcg128_t pcg_advance_lcg_128(pcg128_t state, pcg128_t delta, pcg128_t cur_mult, + pcg128_t cur_plus) { + pcg128_t acc_mult = PCG_128BIT_CONSTANT(0u, 1u); + pcg128_t acc_plus = PCG_128BIT_CONSTANT(0u, 0u); + while ((delta.high > 0) || (delta.low > 0)) { + if (delta.low & 1) { + acc_mult = _pcg128_mult(acc_mult, cur_mult); + acc_plus = _pcg128_add(_pcg128_mult(acc_plus, cur_mult), cur_plus); + } + cur_plus = _pcg128_mult(_pcg128_add(cur_mult, PCG_128BIT_CONSTANT(0u, 1u)), + cur_plus); + cur_mult = _pcg128_mult(cur_mult, cur_mult); + delta.low >>= 1; + delta.low += delta.high & 1; + delta.high >>= 1; + } + return _pcg128_add(_pcg128_mult(acc_mult, state), acc_plus); +} + +#endif + +extern inline uint64_t pcg64_next64(pcg64_state *state); +extern inline uint32_t pcg64_next32(pcg64_state *state); + +extern void pcg64_advance(pcg64_state *state, uint64_t *step) { + pcg128_t delta; +#if __SIZEOF_INT128__ && !defined(PCG_FORCE_EMULATED_128BIT_MATH) + delta = (((pcg128_t)step[0]) << 64) | step[1]; +#else + delta.high = step[0]; + delta.low = step[1]; +#endif + pcg64_advance_r(state->pcg_state, delta); +} + +extern void pcg64_set_seed(pcg64_state *state, uint64_t *seed, uint64_t *inc) { + pcg128_t s, i; +#if __SIZEOF_INT128__ && !defined(PCG_FORCE_EMULATED_128BIT_MATH) + s = (((pcg128_t)seed[0]) << 64) | seed[1]; + i = (((pcg128_t)inc[0]) << 64) | inc[1]; +#else + s.high = seed[0]; + s.low = seed[1]; + i.high = inc[0]; + i.low = inc[1]; +#endif + pcg64_srandom_r(state->pcg_state, s, i); +} diff --git a/numpy/random/src/pcg64/pcg64.h b/numpy/random/src/pcg64/pcg64.h new file mode 100644 index 000000000..156c73a36 --- /dev/null +++ b/numpy/random/src/pcg64/pcg64.h @@ -0,0 +1,241 @@ +/* + * PCG64 Random Number Generation for C. + * + * Copyright 2014 Melissa O'Neill <oneill@pcg-random.org> + * Copyright 2015 Robert Kern <robert.kern@gmail.com> + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For additional information about the PCG random number generation scheme, + * including its license and other licensing options, visit + * + * http://www.pcg-random.org + */ + +#ifndef PCG64_H_INCLUDED +#define PCG64_H_INCLUDED 1 + +#ifdef _WIN32 +#ifndef _INTTYPES +#include "../common/stdint.h" +#endif +#define inline __inline __forceinline +#else +#include <inttypes.h> +#if _MSC_VER >= 1900 && _M_AMD64 +#include <intrin.h> +#pragma intrinsic(_umul128) +#endif +#endif + +#if __GNUC_GNU_INLINE__ && !defined(__cplusplus) +#error Nonstandard GNU inlining semantics. Compile with -std=c99 or better. +#endif + +#if __cplusplus +extern "C" { +#endif + +#if __SIZEOF_INT128__ && !defined(PCG_FORCE_EMULATED_128BIT_MATH) +typedef __uint128_t pcg128_t; +#define PCG_128BIT_CONSTANT(high, low) (((pcg128_t)(high) << 64) + low) +#else +typedef struct { + uint64_t high; + uint64_t low; +} pcg128_t; + +static inline pcg128_t PCG_128BIT_CONSTANT(uint64_t high, uint64_t low) { + pcg128_t result; + result.high = high; + result.low = low; + return result; +} + +#define PCG_EMULATED_128BIT_MATH 1 +#endif + +typedef struct { pcg128_t state; } pcg_state_128; + +typedef struct { + pcg128_t state; + pcg128_t inc; +} pcg_state_setseq_128; + +#define PCG_DEFAULT_MULTIPLIER_128 \ + PCG_128BIT_CONSTANT(2549297995355413924ULL, 4865540595714422341ULL) +#define PCG_DEFAULT_INCREMENT_128 \ + PCG_128BIT_CONSTANT(6364136223846793005ULL, 1442695040888963407ULL) +#define PCG_STATE_SETSEQ_128_INITIALIZER \ + { \ + PCG_128BIT_CONSTANT(0x979c9a98d8462005ULL, 0x7d3e9cb6cfe0549bULL) \ + , PCG_128BIT_CONSTANT(0x0000000000000001ULL, 0xda3e39cb94b95bdbULL) \ + } + +static inline uint64_t pcg_rotr_64(uint64_t value, unsigned int rot) { + return (value >> rot) | (value << ((-rot) & 63)); +} + +#ifdef PCG_EMULATED_128BIT_MATH + +static inline pcg128_t _pcg128_add(pcg128_t a, pcg128_t b) { + pcg128_t result; + + result.low = a.low + b.low; + result.high = a.high + b.high + (result.low < b.low); + return result; +} + +static inline void _pcg_mult64(uint64_t x, uint64_t y, uint64_t *z1, + uint64_t *z0) { + +#if defined _WIN32 && _MSC_VER >= 1900 && _M_AMD64 + z0[0] = _umul128(x, y, z1); +#else + uint64_t x0, x1, y0, y1; + uint64_t w0, w1, w2, t; + /* Lower 64 bits are straightforward clock-arithmetic. */ + *z0 = x * y; + + x0 = x & 0xFFFFFFFFULL; + x1 = x >> 32; + y0 = y & 0xFFFFFFFFULL; + y1 = y >> 32; + w0 = x0 * y0; + t = x1 * y0 + (w0 >> 32); + w1 = t & 0xFFFFFFFFULL; + w2 = t >> 32; + w1 += x0 * y1; + *z1 = x1 * y1 + w2 + (w1 >> 32); +#endif + +} + +static inline pcg128_t _pcg128_mult(pcg128_t a, pcg128_t b) { + uint64_t h1; + pcg128_t result; + + h1 = a.high * b.low + a.low * b.high; + _pcg_mult64(a.low, b.low, &(result.high), &(result.low)); + result.high += h1; + return result; +} + +static inline void pcg_setseq_128_step_r(pcg_state_setseq_128 *rng) { + rng->state = _pcg128_add(_pcg128_mult(rng->state, PCG_DEFAULT_MULTIPLIER_128), + rng->inc); +} + +static inline uint64_t pcg_output_xsl_rr_128_64(pcg128_t state) { + return pcg_rotr_64(state.high ^ state.low, state.high >> 58u); +} + +static inline void pcg_setseq_128_srandom_r(pcg_state_setseq_128 *rng, + pcg128_t initstate, + pcg128_t initseq) { + rng->state = PCG_128BIT_CONSTANT(0ULL, 0ULL); + rng->inc.high = initseq.high << 1u; + rng->inc.high |= initseq.low & 0x800000000000ULL; + rng->inc.low = (initseq.low << 1u) | 1u; + pcg_setseq_128_step_r(rng); + rng->state = _pcg128_add(rng->state, initstate); + pcg_setseq_128_step_r(rng); +} + +#else /* PCG_EMULATED_128BIT_MATH */ + +static inline void pcg_setseq_128_step_r(pcg_state_setseq_128 *rng) { + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_128 + rng->inc; +} + +static inline uint64_t pcg_output_xsl_rr_128_64(pcg128_t state) { + return pcg_rotr_64(((uint64_t)(state >> 64u)) ^ (uint64_t)state, + state >> 122u); +} + +static inline void pcg_setseq_128_srandom_r(pcg_state_setseq_128 *rng, + pcg128_t initstate, + pcg128_t initseq) { + rng->state = 0U; + rng->inc = (initseq << 1u) | 1u; + pcg_setseq_128_step_r(rng); + rng->state += initstate; + pcg_setseq_128_step_r(rng); +} + +#endif /* PCG_EMULATED_128BIT_MATH */ + +static inline uint64_t +pcg_setseq_128_xsl_rr_64_random_r(pcg_state_setseq_128 *rng) { + pcg_setseq_128_step_r(rng); + return pcg_output_xsl_rr_128_64(rng->state); +} + +static inline uint64_t +pcg_setseq_128_xsl_rr_64_boundedrand_r(pcg_state_setseq_128 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_setseq_128_xsl_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +extern pcg128_t pcg_advance_lcg_128(pcg128_t state, pcg128_t delta, + pcg128_t cur_mult, pcg128_t cur_plus); + +static inline void pcg_setseq_128_advance_r(pcg_state_setseq_128 *rng, + pcg128_t delta) { + rng->state = pcg_advance_lcg_128(rng->state, delta, + PCG_DEFAULT_MULTIPLIER_128, rng->inc); +} + +typedef pcg_state_setseq_128 pcg64_random_t; +#define pcg64_random_r pcg_setseq_128_xsl_rr_64_random_r +#define pcg64_boundedrand_r pcg_setseq_128_xsl_rr_64_boundedrand_r +#define pcg64_srandom_r pcg_setseq_128_srandom_r +#define pcg64_advance_r pcg_setseq_128_advance_r +#define PCG64_INITIALIZER PCG_STATE_SETSEQ_128_INITIALIZER + +#if __cplusplus +} +#endif + +typedef struct s_pcg64_state { + pcg64_random_t *pcg_state; + int has_uint32; + uint32_t uinteger; +} pcg64_state; + +static inline uint64_t pcg64_next64(pcg64_state *state) { + return pcg64_random_r(state->pcg_state); +} + +static inline uint32_t pcg64_next32(pcg64_state *state) { + uint64_t next; + if (state->has_uint32) { + state->has_uint32 = 0; + return state->uinteger; + } + next = pcg64_random_r(state->pcg_state); + state->has_uint32 = 1; + state->uinteger = (uint32_t)(next >> 32); + return (uint32_t)(next & 0xffffffff); +} + +void pcg64_advance(pcg64_state *state, uint64_t *step); + +void pcg64_set_seed(pcg64_state *state, uint64_t *seed, uint64_t *inc); + +#endif /* PCG64_H_INCLUDED */ diff --git a/numpy/random/src/pcg64/pcg64.orig.c b/numpy/random/src/pcg64/pcg64.orig.c new file mode 100644 index 000000000..07e97e4b6 --- /dev/null +++ b/numpy/random/src/pcg64/pcg64.orig.c @@ -0,0 +1,11 @@ +#include "pcg64.orig.h" + +extern inline void pcg_setseq_128_srandom_r(pcg64_random_t *rng, + pcg128_t initstate, + pcg128_t initseq); + +extern uint64_t pcg_rotr_64(uint64_t value, unsigned int rot); +extern inline uint64_t pcg_output_xsl_rr_128_64(pcg128_t state); +extern void pcg_setseq_128_step_r(struct pcg_state_setseq_128 *rng); +extern uint64_t +pcg_setseq_128_xsl_rr_64_random_r(struct pcg_state_setseq_128 *rng); diff --git a/numpy/random/src/pcg64/pcg64.orig.h b/numpy/random/src/pcg64/pcg64.orig.h new file mode 100644 index 000000000..74be91f31 --- /dev/null +++ b/numpy/random/src/pcg64/pcg64.orig.h @@ -0,0 +1,2025 @@ +/* + * PCG Random Number Generation for C. + * + * Copyright 2014 Melissa O'Neill <oneill@pcg-random.org> + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * For additional information about the PCG random number generation scheme, + * including its license and other licensing options, visit + * + * http://www.pcg-random.org + */ + +/* + * This code is derived from the canonical C++ PCG implementation, which + * has many additional features and is preferable if you can use C++ in + * your project. + * + * Much of the derivation was performed mechanically. In particular, the + * output functions were generated by compiling the C++ output functions + * into LLVM bitcode and then transforming that using the LLVM C backend + * (from https://github.com/draperlaboratory/llvm-cbe), and then + * postprocessing and hand editing the output. + * + * Much of the remaining code was generated by C-preprocessor metaprogramming. + */ + +#ifndef PCG_VARIANTS_H_INCLUDED +#define PCG_VARIANTS_H_INCLUDED 1 + +#include <inttypes.h> + +#if __SIZEOF_INT128__ +typedef __uint128_t pcg128_t; +#define PCG_128BIT_CONSTANT(high, low) ((((pcg128_t)high) << 64) + low) +#define PCG_HAS_128BIT_OPS 1 +#endif + +#if __GNUC_GNU_INLINE__ && !defined(__cplusplus) +#error Nonstandard GNU inlining semantics. Compile with -std=c99 or better. +// We could instead use macros PCG_INLINE and PCG_EXTERN_INLINE +// but better to just reject ancient C code. +#endif + +#if __cplusplus +extern "C" { +#endif + +/* + * Rotate helper functions. + */ + +inline uint8_t pcg_rotr_8(uint8_t value, unsigned int rot) { +/* Unfortunately, clang is kinda pathetic when it comes to properly + * recognizing idiomatic rotate code, so for clang we actually provide + * assembler directives (enabled with PCG_USE_INLINE_ASM). Boo, hiss. + */ +#if PCG_USE_INLINE_ASM && __clang__ && (__x86_64__ || __i386__) + asm("rorb %%cl, %0" : "=r"(value) : "0"(value), "c"(rot)); + return value; +#else + return (value >> rot) | (value << ((-rot) & 7)); +#endif +} + +inline uint16_t pcg_rotr_16(uint16_t value, unsigned int rot) { +#if PCG_USE_INLINE_ASM && __clang__ && (__x86_64__ || __i386__) + asm("rorw %%cl, %0" : "=r"(value) : "0"(value), "c"(rot)); + return value; +#else + return (value >> rot) | (value << ((-rot) & 15)); +#endif +} + +inline uint32_t pcg_rotr_32(uint32_t value, unsigned int rot) { +#if PCG_USE_INLINE_ASM && __clang__ && (__x86_64__ || __i386__) + asm("rorl %%cl, %0" : "=r"(value) : "0"(value), "c"(rot)); + return value; +#else + return (value >> rot) | (value << ((-rot) & 31)); +#endif +} + +inline uint64_t pcg_rotr_64(uint64_t value, unsigned int rot) { +#if 0 && PCG_USE_INLINE_ASM && __clang__ && __x86_64__ + // For whatever reason, clang actually *does* generate rotq by + // itself, so we don't need this code. + asm ("rorq %%cl, %0" : "=r" (value) : "0" (value), "c" (rot)); + return value; +#else + return (value >> rot) | (value << ((-rot) & 63)); +#endif +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t pcg_rotr_128(pcg128_t value, unsigned int rot) { + return (value >> rot) | (value << ((-rot) & 127)); +} +#endif + +/* + * Output functions. These are the core of the PCG generation scheme. + */ + +// XSH RS + +inline uint8_t pcg_output_xsh_rs_16_8(uint16_t state) { + return (uint8_t)(((state >> 7u) ^ state) >> ((state >> 14u) + 3u)); +} + +inline uint16_t pcg_output_xsh_rs_32_16(uint32_t state) { + return (uint16_t)(((state >> 11u) ^ state) >> ((state >> 30u) + 11u)); +} + +inline uint32_t pcg_output_xsh_rs_64_32(uint64_t state) { + + return (uint32_t)(((state >> 22u) ^ state) >> ((state >> 61u) + 22u)); +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_output_xsh_rs_128_64(pcg128_t state) { + return (uint64_t)(((state >> 43u) ^ state) >> ((state >> 124u) + 45u)); +} +#endif + +// XSH RR + +inline uint8_t pcg_output_xsh_rr_16_8(uint16_t state) { + return pcg_rotr_8(((state >> 5u) ^ state) >> 5u, state >> 13u); +} + +inline uint16_t pcg_output_xsh_rr_32_16(uint32_t state) { + return pcg_rotr_16(((state >> 10u) ^ state) >> 12u, state >> 28u); +} + +inline uint32_t pcg_output_xsh_rr_64_32(uint64_t state) { + return pcg_rotr_32(((state >> 18u) ^ state) >> 27u, state >> 59u); +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_output_xsh_rr_128_64(pcg128_t state) { + return pcg_rotr_64(((state >> 29u) ^ state) >> 58u, state >> 122u); +} +#endif + +// RXS M XS + +inline uint8_t pcg_output_rxs_m_xs_8_8(uint8_t state) { + uint8_t word = ((state >> ((state >> 6u) + 2u)) ^ state) * 217u; + return (word >> 6u) ^ word; +} + +inline uint16_t pcg_output_rxs_m_xs_16_16(uint16_t state) { + uint16_t word = ((state >> ((state >> 13u) + 3u)) ^ state) * 62169u; + return (word >> 11u) ^ word; +} + +inline uint32_t pcg_output_rxs_m_xs_32_32(uint32_t state) { + uint32_t word = ((state >> ((state >> 28u) + 4u)) ^ state) * 277803737u; + return (word >> 22u) ^ word; +} + +inline uint64_t pcg_output_rxs_m_xs_64_64(uint64_t state) { + uint64_t word = + ((state >> ((state >> 59u) + 5u)) ^ state) * 12605985483714917081ull; + return (word >> 43u) ^ word; +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t pcg_output_rxs_m_xs_128_128(pcg128_t state) { + pcg128_t word = + ((state >> ((state >> 122u) + 6u)) ^ state) * + (PCG_128BIT_CONSTANT(17766728186571221404ULL, 12605985483714917081ULL)); + // 327738287884841127335028083622016905945 + return (word >> 86u) ^ word; +} +#endif + +// XSL RR (only defined for >= 64 bits) + +inline uint32_t pcg_output_xsl_rr_64_32(uint64_t state) { + return pcg_rotr_32(((uint32_t)(state >> 32u)) ^ (uint32_t)state, + state >> 59u); +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_output_xsl_rr_128_64(pcg128_t state) { + return pcg_rotr_64(((uint64_t)(state >> 64u)) ^ (uint64_t)state, + state >> 122u); +} +#endif + +// XSL RR RR (only defined for >= 64 bits) + +inline uint64_t pcg_output_xsl_rr_rr_64_64(uint64_t state) { + uint32_t rot1 = (uint32_t)(state >> 59u); + uint32_t high = (uint32_t)(state >> 32u); + uint32_t low = (uint32_t)state; + uint32_t xored = high ^ low; + uint32_t newlow = pcg_rotr_32(xored, rot1); + uint32_t newhigh = pcg_rotr_32(high, newlow & 31u); + return (((uint64_t)newhigh) << 32u) | newlow; +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t pcg_output_xsl_rr_rr_128_128(pcg128_t state) { + uint32_t rot1 = (uint32_t)(state >> 122u); + uint64_t high = (uint64_t)(state >> 64u); + uint64_t low = (uint64_t)state; + uint64_t xored = high ^ low; + uint64_t newlow = pcg_rotr_64(xored, rot1); + uint64_t newhigh = pcg_rotr_64(high, newlow & 63u); + return (((pcg128_t)newhigh) << 64u) | newlow; +} +#endif + +#define PCG_DEFAULT_MULTIPLIER_8 141U +#define PCG_DEFAULT_MULTIPLIER_16 12829U +#define PCG_DEFAULT_MULTIPLIER_32 747796405U +#define PCG_DEFAULT_MULTIPLIER_64 6364136223846793005ULL + +#define PCG_DEFAULT_INCREMENT_8 77U +#define PCG_DEFAULT_INCREMENT_16 47989U +#define PCG_DEFAULT_INCREMENT_32 2891336453U +#define PCG_DEFAULT_INCREMENT_64 1442695040888963407ULL + +#if PCG_HAS_128BIT_OPS +#define PCG_DEFAULT_MULTIPLIER_128 \ + PCG_128BIT_CONSTANT(2549297995355413924ULL, 4865540595714422341ULL) +#define PCG_DEFAULT_INCREMENT_128 \ + PCG_128BIT_CONSTANT(6364136223846793005ULL, 1442695040888963407ULL) +#endif + + /* + * Static initialization constants (if you can't call srandom for some + * bizarre reason). + */ + +#define PCG_STATE_ONESEQ_8_INITIALIZER \ + { 0xd7U } +#define PCG_STATE_ONESEQ_16_INITIALIZER \ + { 0x20dfU } +#define PCG_STATE_ONESEQ_32_INITIALIZER \ + { 0x46b56677U } +#define PCG_STATE_ONESEQ_64_INITIALIZER \ + { 0x4d595df4d0f33173ULL } +#if PCG_HAS_128BIT_OPS +#define PCG_STATE_ONESEQ_128_INITIALIZER \ + { PCG_128BIT_CONSTANT(0xb8dc10e158a92392ULL, 0x98046df007ec0a53ULL) } +#endif + +#define PCG_STATE_UNIQUE_8_INITIALIZER PCG_STATE_ONESEQ_8_INITIALIZER +#define PCG_STATE_UNIQUE_16_INITIALIZER PCG_STATE_ONESEQ_16_INITIALIZER +#define PCG_STATE_UNIQUE_32_INITIALIZER PCG_STATE_ONESEQ_32_INITIALIZER +#define PCG_STATE_UNIQUE_64_INITIALIZER PCG_STATE_ONESEQ_64_INITIALIZER +#if PCG_HAS_128BIT_OPS +#define PCG_STATE_UNIQUE_128_INITIALIZER PCG_STATE_ONESEQ_128_INITIALIZER +#endif + +#define PCG_STATE_MCG_8_INITIALIZER \ + { 0xe5U } +#define PCG_STATE_MCG_16_INITIALIZER \ + { 0xa5e5U } +#define PCG_STATE_MCG_32_INITIALIZER \ + { 0xd15ea5e5U } +#define PCG_STATE_MCG_64_INITIALIZER \ + { 0xcafef00dd15ea5e5ULL } +#if PCG_HAS_128BIT_OPS +#define PCG_STATE_MCG_128_INITIALIZER \ + { PCG_128BIT_CONSTANT(0x0000000000000000ULL, 0xcafef00dd15ea5e5ULL) } +#endif + +#define PCG_STATE_SETSEQ_8_INITIALIZER \ + { 0x9bU, 0xdbU } +#define PCG_STATE_SETSEQ_16_INITIALIZER \ + { 0xe39bU, 0x5bdbU } +#define PCG_STATE_SETSEQ_32_INITIALIZER \ + { 0xec02d89bU, 0x94b95bdbU } +#define PCG_STATE_SETSEQ_64_INITIALIZER \ + { 0x853c49e6748fea9bULL, 0xda3e39cb94b95bdbULL } +#if PCG_HAS_128BIT_OPS +#define PCG_STATE_SETSEQ_128_INITIALIZER \ + { \ + PCG_128BIT_CONSTANT(0x979c9a98d8462005ULL, 0x7d3e9cb6cfe0549bULL) \ + , PCG_128BIT_CONSTANT(0x0000000000000001ULL, 0xda3e39cb94b95bdbULL) \ + } +#endif + +/* Representations for the oneseq, mcg, and unique variants */ + +struct pcg_state_8 { + uint8_t state; +}; + +struct pcg_state_16 { + uint16_t state; +}; + +struct pcg_state_32 { + uint32_t state; +}; + +struct pcg_state_64 { + uint64_t state; +}; + +#if PCG_HAS_128BIT_OPS +struct pcg_state_128 { + pcg128_t state; +}; +#endif + +/* Representations setseq variants */ + +struct pcg_state_setseq_8 { + uint8_t state; + uint8_t inc; +}; + +struct pcg_state_setseq_16 { + uint16_t state; + uint16_t inc; +}; + +struct pcg_state_setseq_32 { + uint32_t state; + uint32_t inc; +}; + +struct pcg_state_setseq_64 { + uint64_t state; + uint64_t inc; +}; + +#if PCG_HAS_128BIT_OPS +struct pcg_state_setseq_128 { + pcg128_t state; + pcg128_t inc; +}; +#endif + +/* Multi-step advance functions (jump-ahead, jump-back) */ + +extern uint8_t pcg_advance_lcg_8(uint8_t state, uint8_t delta, uint8_t cur_mult, + uint8_t cur_plus); +extern uint16_t pcg_advance_lcg_16(uint16_t state, uint16_t delta, + uint16_t cur_mult, uint16_t cur_plus); +extern uint32_t pcg_advance_lcg_32(uint32_t state, uint32_t delta, + uint32_t cur_mult, uint32_t cur_plus); +extern uint64_t pcg_advance_lcg_64(uint64_t state, uint64_t delta, + uint64_t cur_mult, uint64_t cur_plus); + +#if PCG_HAS_128BIT_OPS +extern pcg128_t pcg_advance_lcg_128(pcg128_t state, pcg128_t delta, + pcg128_t cur_mult, pcg128_t cur_plus); +#endif + +/* Functions to advance the underlying LCG, one version for each size and + * each style. These functions are considered semi-private. There is rarely + * a good reason to call them directly. + */ + +inline void pcg_oneseq_8_step_r(struct pcg_state_8 *rng) { + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_8 + PCG_DEFAULT_INCREMENT_8; +} + +inline void pcg_oneseq_8_advance_r(struct pcg_state_8 *rng, uint8_t delta) { + rng->state = pcg_advance_lcg_8(rng->state, delta, PCG_DEFAULT_MULTIPLIER_8, + PCG_DEFAULT_INCREMENT_8); +} + +inline void pcg_mcg_8_step_r(struct pcg_state_8 *rng) { + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_8; +} + +inline void pcg_mcg_8_advance_r(struct pcg_state_8 *rng, uint8_t delta) { + rng->state = + pcg_advance_lcg_8(rng->state, delta, PCG_DEFAULT_MULTIPLIER_8, 0u); +} + +inline void pcg_unique_8_step_r(struct pcg_state_8 *rng) { + rng->state = + rng->state * PCG_DEFAULT_MULTIPLIER_8 + (uint8_t)(((intptr_t)rng) | 1u); +} + +inline void pcg_unique_8_advance_r(struct pcg_state_8 *rng, uint8_t delta) { + rng->state = pcg_advance_lcg_8(rng->state, delta, PCG_DEFAULT_MULTIPLIER_8, + (uint8_t)(((intptr_t)rng) | 1u)); +} + +inline void pcg_setseq_8_step_r(struct pcg_state_setseq_8 *rng) { + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_8 + rng->inc; +} + +inline void pcg_setseq_8_advance_r(struct pcg_state_setseq_8 *rng, + uint8_t delta) { + rng->state = + pcg_advance_lcg_8(rng->state, delta, PCG_DEFAULT_MULTIPLIER_8, rng->inc); +} + +inline void pcg_oneseq_16_step_r(struct pcg_state_16 *rng) { + rng->state = + rng->state * PCG_DEFAULT_MULTIPLIER_16 + PCG_DEFAULT_INCREMENT_16; +} + +inline void pcg_oneseq_16_advance_r(struct pcg_state_16 *rng, uint16_t delta) { + rng->state = pcg_advance_lcg_16(rng->state, delta, PCG_DEFAULT_MULTIPLIER_16, + PCG_DEFAULT_INCREMENT_16); +} + +inline void pcg_mcg_16_step_r(struct pcg_state_16 *rng) { + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_16; +} + +inline void pcg_mcg_16_advance_r(struct pcg_state_16 *rng, uint16_t delta) { + rng->state = + pcg_advance_lcg_16(rng->state, delta, PCG_DEFAULT_MULTIPLIER_16, 0u); +} + +inline void pcg_unique_16_step_r(struct pcg_state_16 *rng) { + rng->state = + rng->state * PCG_DEFAULT_MULTIPLIER_16 + (uint16_t)(((intptr_t)rng) | 1u); +} + +inline void pcg_unique_16_advance_r(struct pcg_state_16 *rng, uint16_t delta) { + rng->state = pcg_advance_lcg_16(rng->state, delta, PCG_DEFAULT_MULTIPLIER_16, + (uint16_t)(((intptr_t)rng) | 1u)); +} + +inline void pcg_setseq_16_step_r(struct pcg_state_setseq_16 *rng) { + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_16 + rng->inc; +} + +inline void pcg_setseq_16_advance_r(struct pcg_state_setseq_16 *rng, + uint16_t delta) { + rng->state = pcg_advance_lcg_16(rng->state, delta, PCG_DEFAULT_MULTIPLIER_16, + rng->inc); +} + +inline void pcg_oneseq_32_step_r(struct pcg_state_32 *rng) { + rng->state = + rng->state * PCG_DEFAULT_MULTIPLIER_32 + PCG_DEFAULT_INCREMENT_32; +} + +inline void pcg_oneseq_32_advance_r(struct pcg_state_32 *rng, uint32_t delta) { + rng->state = pcg_advance_lcg_32(rng->state, delta, PCG_DEFAULT_MULTIPLIER_32, + PCG_DEFAULT_INCREMENT_32); +} + +inline void pcg_mcg_32_step_r(struct pcg_state_32 *rng) { + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_32; +} + +inline void pcg_mcg_32_advance_r(struct pcg_state_32 *rng, uint32_t delta) { + rng->state = + pcg_advance_lcg_32(rng->state, delta, PCG_DEFAULT_MULTIPLIER_32, 0u); +} + +inline void pcg_unique_32_step_r(struct pcg_state_32 *rng) { + rng->state = + rng->state * PCG_DEFAULT_MULTIPLIER_32 + (uint32_t)(((intptr_t)rng) | 1u); +} + +inline void pcg_unique_32_advance_r(struct pcg_state_32 *rng, uint32_t delta) { + rng->state = pcg_advance_lcg_32(rng->state, delta, PCG_DEFAULT_MULTIPLIER_32, + (uint32_t)(((intptr_t)rng) | 1u)); +} + +inline void pcg_setseq_32_step_r(struct pcg_state_setseq_32 *rng) { + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_32 + rng->inc; +} + +inline void pcg_setseq_32_advance_r(struct pcg_state_setseq_32 *rng, + uint32_t delta) { + rng->state = pcg_advance_lcg_32(rng->state, delta, PCG_DEFAULT_MULTIPLIER_32, + rng->inc); +} + +inline void pcg_oneseq_64_step_r(struct pcg_state_64 *rng) { + rng->state = + rng->state * PCG_DEFAULT_MULTIPLIER_64 + PCG_DEFAULT_INCREMENT_64; +} + +inline void pcg_oneseq_64_advance_r(struct pcg_state_64 *rng, uint64_t delta) { + rng->state = pcg_advance_lcg_64(rng->state, delta, PCG_DEFAULT_MULTIPLIER_64, + PCG_DEFAULT_INCREMENT_64); +} + +inline void pcg_mcg_64_step_r(struct pcg_state_64 *rng) { + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_64; +} + +inline void pcg_mcg_64_advance_r(struct pcg_state_64 *rng, uint64_t delta) { + rng->state = + pcg_advance_lcg_64(rng->state, delta, PCG_DEFAULT_MULTIPLIER_64, 0u); +} + +inline void pcg_unique_64_step_r(struct pcg_state_64 *rng) { + rng->state = + rng->state * PCG_DEFAULT_MULTIPLIER_64 + (uint64_t)(((intptr_t)rng) | 1u); +} + +inline void pcg_unique_64_advance_r(struct pcg_state_64 *rng, uint64_t delta) { + rng->state = pcg_advance_lcg_64(rng->state, delta, PCG_DEFAULT_MULTIPLIER_64, + (uint64_t)(((intptr_t)rng) | 1u)); +} + +inline void pcg_setseq_64_step_r(struct pcg_state_setseq_64 *rng) { + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_64 + rng->inc; +} + +inline void pcg_setseq_64_advance_r(struct pcg_state_setseq_64 *rng, + uint64_t delta) { + rng->state = pcg_advance_lcg_64(rng->state, delta, PCG_DEFAULT_MULTIPLIER_64, + rng->inc); +} + +#if PCG_HAS_128BIT_OPS +inline void pcg_oneseq_128_step_r(struct pcg_state_128 *rng) { + rng->state = + rng->state * PCG_DEFAULT_MULTIPLIER_128 + PCG_DEFAULT_INCREMENT_128; +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_oneseq_128_advance_r(struct pcg_state_128 *rng, + pcg128_t delta) { + rng->state = pcg_advance_lcg_128( + rng->state, delta, PCG_DEFAULT_MULTIPLIER_128, PCG_DEFAULT_INCREMENT_128); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_mcg_128_step_r(struct pcg_state_128 *rng) { + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_128; +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_mcg_128_advance_r(struct pcg_state_128 *rng, pcg128_t delta) { + rng->state = + pcg_advance_lcg_128(rng->state, delta, PCG_DEFAULT_MULTIPLIER_128, 0u); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_unique_128_step_r(struct pcg_state_128 *rng) { + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_128 + + (pcg128_t)(((intptr_t)rng) | 1u); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_unique_128_advance_r(struct pcg_state_128 *rng, + pcg128_t delta) { + rng->state = + pcg_advance_lcg_128(rng->state, delta, PCG_DEFAULT_MULTIPLIER_128, + (pcg128_t)(((intptr_t)rng) | 1u)); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_setseq_128_step_r(struct pcg_state_setseq_128 *rng) { + rng->state = rng->state * PCG_DEFAULT_MULTIPLIER_128 + rng->inc; +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_setseq_128_advance_r(struct pcg_state_setseq_128 *rng, + pcg128_t delta) { + rng->state = pcg_advance_lcg_128(rng->state, delta, + PCG_DEFAULT_MULTIPLIER_128, rng->inc); +} +#endif + +/* Functions to seed the RNG state, one version for each size and each + * style. Unlike the step functions, regular users can and should call + * these functions. + */ + +inline void pcg_oneseq_8_srandom_r(struct pcg_state_8 *rng, uint8_t initstate) { + rng->state = 0U; + pcg_oneseq_8_step_r(rng); + rng->state += initstate; + pcg_oneseq_8_step_r(rng); +} + +inline void pcg_mcg_8_srandom_r(struct pcg_state_8 *rng, uint8_t initstate) { + rng->state = initstate | 1u; +} + +inline void pcg_unique_8_srandom_r(struct pcg_state_8 *rng, uint8_t initstate) { + rng->state = 0U; + pcg_unique_8_step_r(rng); + rng->state += initstate; + pcg_unique_8_step_r(rng); +} + +inline void pcg_setseq_8_srandom_r(struct pcg_state_setseq_8 *rng, + uint8_t initstate, uint8_t initseq) { + rng->state = 0U; + rng->inc = (initseq << 1u) | 1u; + pcg_setseq_8_step_r(rng); + rng->state += initstate; + pcg_setseq_8_step_r(rng); +} + +inline void pcg_oneseq_16_srandom_r(struct pcg_state_16 *rng, + uint16_t initstate) { + rng->state = 0U; + pcg_oneseq_16_step_r(rng); + rng->state += initstate; + pcg_oneseq_16_step_r(rng); +} + +inline void pcg_mcg_16_srandom_r(struct pcg_state_16 *rng, uint16_t initstate) { + rng->state = initstate | 1u; +} + +inline void pcg_unique_16_srandom_r(struct pcg_state_16 *rng, + uint16_t initstate) { + rng->state = 0U; + pcg_unique_16_step_r(rng); + rng->state += initstate; + pcg_unique_16_step_r(rng); +} + +inline void pcg_setseq_16_srandom_r(struct pcg_state_setseq_16 *rng, + uint16_t initstate, uint16_t initseq) { + rng->state = 0U; + rng->inc = (initseq << 1u) | 1u; + pcg_setseq_16_step_r(rng); + rng->state += initstate; + pcg_setseq_16_step_r(rng); +} + +inline void pcg_oneseq_32_srandom_r(struct pcg_state_32 *rng, + uint32_t initstate) { + rng->state = 0U; + pcg_oneseq_32_step_r(rng); + rng->state += initstate; + pcg_oneseq_32_step_r(rng); +} + +inline void pcg_mcg_32_srandom_r(struct pcg_state_32 *rng, uint32_t initstate) { + rng->state = initstate | 1u; +} + +inline void pcg_unique_32_srandom_r(struct pcg_state_32 *rng, + uint32_t initstate) { + rng->state = 0U; + pcg_unique_32_step_r(rng); + rng->state += initstate; + pcg_unique_32_step_r(rng); +} + +inline void pcg_setseq_32_srandom_r(struct pcg_state_setseq_32 *rng, + uint32_t initstate, uint32_t initseq) { + rng->state = 0U; + rng->inc = (initseq << 1u) | 1u; + pcg_setseq_32_step_r(rng); + rng->state += initstate; + pcg_setseq_32_step_r(rng); +} + +inline void pcg_oneseq_64_srandom_r(struct pcg_state_64 *rng, + uint64_t initstate) { + rng->state = 0U; + pcg_oneseq_64_step_r(rng); + rng->state += initstate; + pcg_oneseq_64_step_r(rng); +} + +inline void pcg_mcg_64_srandom_r(struct pcg_state_64 *rng, uint64_t initstate) { + rng->state = initstate | 1u; +} + +inline void pcg_unique_64_srandom_r(struct pcg_state_64 *rng, + uint64_t initstate) { + rng->state = 0U; + pcg_unique_64_step_r(rng); + rng->state += initstate; + pcg_unique_64_step_r(rng); +} + +inline void pcg_setseq_64_srandom_r(struct pcg_state_setseq_64 *rng, + uint64_t initstate, uint64_t initseq) { + rng->state = 0U; + rng->inc = (initseq << 1u) | 1u; + pcg_setseq_64_step_r(rng); + rng->state += initstate; + pcg_setseq_64_step_r(rng); +} + +#if PCG_HAS_128BIT_OPS +inline void pcg_oneseq_128_srandom_r(struct pcg_state_128 *rng, + pcg128_t initstate) { + rng->state = 0U; + pcg_oneseq_128_step_r(rng); + rng->state += initstate; + pcg_oneseq_128_step_r(rng); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_mcg_128_srandom_r(struct pcg_state_128 *rng, + pcg128_t initstate) { + rng->state = initstate | 1u; +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_unique_128_srandom_r(struct pcg_state_128 *rng, + pcg128_t initstate) { + rng->state = 0U; + pcg_unique_128_step_r(rng); + rng->state += initstate; + pcg_unique_128_step_r(rng); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline void pcg_setseq_128_srandom_r(struct pcg_state_setseq_128 *rng, + pcg128_t initstate, pcg128_t initseq) { + rng->state = 0U; + rng->inc = (initseq << 1u) | 1u; + pcg_setseq_128_step_r(rng); + rng->state += initstate; + pcg_setseq_128_step_r(rng); +} +#endif + +/* Now, finally we create each of the individual generators. We provide + * a random_r function that provides a random number of the appropriate + * type (using the full range of the type) and a boundedrand_r version + * that provides + * + * Implementation notes for boundedrand_r: + * + * To avoid bias, we need to make the range of the RNG a multiple of + * bound, which we do by dropping output less than a threshold. + * Let's consider a 32-bit case... A naive scheme to calculate the + * threshold would be to do + * + * uint32_t threshold = 0x100000000ull % bound; + * + * but 64-bit div/mod is slower than 32-bit div/mod (especially on + * 32-bit platforms). In essence, we do + * + * uint32_t threshold = (0x100000000ull-bound) % bound; + * + * because this version will calculate the same modulus, but the LHS + * value is less than 2^32. + * + * (Note that using modulo is only wise for good RNGs, poorer RNGs + * such as raw LCGs do better using a technique based on division.) + * Empricical tests show that division is preferable to modulus for + * reducting the range of an RNG. It's faster, and sometimes it can + * even be statistically prefereable. + */ + +/* Generation functions for XSH RS */ + +inline uint8_t pcg_oneseq_16_xsh_rs_8_random_r(struct pcg_state_16 *rng) { + uint16_t oldstate = rng->state; + pcg_oneseq_16_step_r(rng); + return pcg_output_xsh_rs_16_8(oldstate); +} + +inline uint8_t pcg_oneseq_16_xsh_rs_8_boundedrand_r(struct pcg_state_16 *rng, + uint8_t bound) { + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_oneseq_16_xsh_rs_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t pcg_oneseq_32_xsh_rs_16_random_r(struct pcg_state_32 *rng) { + uint32_t oldstate = rng->state; + pcg_oneseq_32_step_r(rng); + return pcg_output_xsh_rs_32_16(oldstate); +} + +inline uint16_t pcg_oneseq_32_xsh_rs_16_boundedrand_r(struct pcg_state_32 *rng, + uint16_t bound) { + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_oneseq_32_xsh_rs_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t pcg_oneseq_64_xsh_rs_32_random_r(struct pcg_state_64 *rng) { + uint64_t oldstate = rng->state; + pcg_oneseq_64_step_r(rng); + return pcg_output_xsh_rs_64_32(oldstate); +} + +inline uint32_t pcg_oneseq_64_xsh_rs_32_boundedrand_r(struct pcg_state_64 *rng, + uint32_t bound) { + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_oneseq_64_xsh_rs_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_oneseq_128_xsh_rs_64_random_r(struct pcg_state_128 *rng) { + pcg_oneseq_128_step_r(rng); + return pcg_output_xsh_rs_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_oneseq_128_xsh_rs_64_boundedrand_r(struct pcg_state_128 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_oneseq_128_xsh_rs_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint8_t pcg_unique_16_xsh_rs_8_random_r(struct pcg_state_16 *rng) { + uint16_t oldstate = rng->state; + pcg_unique_16_step_r(rng); + return pcg_output_xsh_rs_16_8(oldstate); +} + +inline uint8_t pcg_unique_16_xsh_rs_8_boundedrand_r(struct pcg_state_16 *rng, + uint8_t bound) { + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_unique_16_xsh_rs_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t pcg_unique_32_xsh_rs_16_random_r(struct pcg_state_32 *rng) { + uint32_t oldstate = rng->state; + pcg_unique_32_step_r(rng); + return pcg_output_xsh_rs_32_16(oldstate); +} + +inline uint16_t pcg_unique_32_xsh_rs_16_boundedrand_r(struct pcg_state_32 *rng, + uint16_t bound) { + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_unique_32_xsh_rs_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t pcg_unique_64_xsh_rs_32_random_r(struct pcg_state_64 *rng) { + uint64_t oldstate = rng->state; + pcg_unique_64_step_r(rng); + return pcg_output_xsh_rs_64_32(oldstate); +} + +inline uint32_t pcg_unique_64_xsh_rs_32_boundedrand_r(struct pcg_state_64 *rng, + uint32_t bound) { + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_unique_64_xsh_rs_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_unique_128_xsh_rs_64_random_r(struct pcg_state_128 *rng) { + pcg_unique_128_step_r(rng); + return pcg_output_xsh_rs_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_unique_128_xsh_rs_64_boundedrand_r(struct pcg_state_128 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_unique_128_xsh_rs_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint8_t +pcg_setseq_16_xsh_rs_8_random_r(struct pcg_state_setseq_16 *rng) { + uint16_t oldstate = rng->state; + pcg_setseq_16_step_r(rng); + return pcg_output_xsh_rs_16_8(oldstate); +} + +inline uint8_t +pcg_setseq_16_xsh_rs_8_boundedrand_r(struct pcg_state_setseq_16 *rng, + uint8_t bound) { + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_setseq_16_xsh_rs_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t +pcg_setseq_32_xsh_rs_16_random_r(struct pcg_state_setseq_32 *rng) { + uint32_t oldstate = rng->state; + pcg_setseq_32_step_r(rng); + return pcg_output_xsh_rs_32_16(oldstate); +} + +inline uint16_t +pcg_setseq_32_xsh_rs_16_boundedrand_r(struct pcg_state_setseq_32 *rng, + uint16_t bound) { + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_setseq_32_xsh_rs_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t +pcg_setseq_64_xsh_rs_32_random_r(struct pcg_state_setseq_64 *rng) { + uint64_t oldstate = rng->state; + pcg_setseq_64_step_r(rng); + return pcg_output_xsh_rs_64_32(oldstate); +} + +inline uint32_t +pcg_setseq_64_xsh_rs_32_boundedrand_r(struct pcg_state_setseq_64 *rng, + uint32_t bound) { + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_setseq_64_xsh_rs_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_setseq_128_xsh_rs_64_random_r(struct pcg_state_setseq_128 *rng) { + pcg_setseq_128_step_r(rng); + return pcg_output_xsh_rs_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_setseq_128_xsh_rs_64_boundedrand_r(struct pcg_state_setseq_128 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_setseq_128_xsh_rs_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint8_t pcg_mcg_16_xsh_rs_8_random_r(struct pcg_state_16 *rng) { + uint16_t oldstate = rng->state; + pcg_mcg_16_step_r(rng); + return pcg_output_xsh_rs_16_8(oldstate); +} + +inline uint8_t pcg_mcg_16_xsh_rs_8_boundedrand_r(struct pcg_state_16 *rng, + uint8_t bound) { + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_mcg_16_xsh_rs_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t pcg_mcg_32_xsh_rs_16_random_r(struct pcg_state_32 *rng) { + uint32_t oldstate = rng->state; + pcg_mcg_32_step_r(rng); + return pcg_output_xsh_rs_32_16(oldstate); +} + +inline uint16_t pcg_mcg_32_xsh_rs_16_boundedrand_r(struct pcg_state_32 *rng, + uint16_t bound) { + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_mcg_32_xsh_rs_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t pcg_mcg_64_xsh_rs_32_random_r(struct pcg_state_64 *rng) { + uint64_t oldstate = rng->state; + pcg_mcg_64_step_r(rng); + return pcg_output_xsh_rs_64_32(oldstate); +} + +inline uint32_t pcg_mcg_64_xsh_rs_32_boundedrand_r(struct pcg_state_64 *rng, + uint32_t bound) { + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_mcg_64_xsh_rs_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_mcg_128_xsh_rs_64_random_r(struct pcg_state_128 *rng) { + pcg_mcg_128_step_r(rng); + return pcg_output_xsh_rs_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_mcg_128_xsh_rs_64_boundedrand_r(struct pcg_state_128 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_mcg_128_xsh_rs_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +/* Generation functions for XSH RR */ + +inline uint8_t pcg_oneseq_16_xsh_rr_8_random_r(struct pcg_state_16 *rng) { + uint16_t oldstate = rng->state; + pcg_oneseq_16_step_r(rng); + return pcg_output_xsh_rr_16_8(oldstate); +} + +inline uint8_t pcg_oneseq_16_xsh_rr_8_boundedrand_r(struct pcg_state_16 *rng, + uint8_t bound) { + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_oneseq_16_xsh_rr_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t pcg_oneseq_32_xsh_rr_16_random_r(struct pcg_state_32 *rng) { + uint32_t oldstate = rng->state; + pcg_oneseq_32_step_r(rng); + return pcg_output_xsh_rr_32_16(oldstate); +} + +inline uint16_t pcg_oneseq_32_xsh_rr_16_boundedrand_r(struct pcg_state_32 *rng, + uint16_t bound) { + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_oneseq_32_xsh_rr_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t pcg_oneseq_64_xsh_rr_32_random_r(struct pcg_state_64 *rng) { + uint64_t oldstate = rng->state; + pcg_oneseq_64_step_r(rng); + return pcg_output_xsh_rr_64_32(oldstate); +} + +inline uint32_t pcg_oneseq_64_xsh_rr_32_boundedrand_r(struct pcg_state_64 *rng, + uint32_t bound) { + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_oneseq_64_xsh_rr_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_oneseq_128_xsh_rr_64_random_r(struct pcg_state_128 *rng) { + pcg_oneseq_128_step_r(rng); + return pcg_output_xsh_rr_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_oneseq_128_xsh_rr_64_boundedrand_r(struct pcg_state_128 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_oneseq_128_xsh_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint8_t pcg_unique_16_xsh_rr_8_random_r(struct pcg_state_16 *rng) { + uint16_t oldstate = rng->state; + pcg_unique_16_step_r(rng); + return pcg_output_xsh_rr_16_8(oldstate); +} + +inline uint8_t pcg_unique_16_xsh_rr_8_boundedrand_r(struct pcg_state_16 *rng, + uint8_t bound) { + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_unique_16_xsh_rr_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t pcg_unique_32_xsh_rr_16_random_r(struct pcg_state_32 *rng) { + uint32_t oldstate = rng->state; + pcg_unique_32_step_r(rng); + return pcg_output_xsh_rr_32_16(oldstate); +} + +inline uint16_t pcg_unique_32_xsh_rr_16_boundedrand_r(struct pcg_state_32 *rng, + uint16_t bound) { + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_unique_32_xsh_rr_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t pcg_unique_64_xsh_rr_32_random_r(struct pcg_state_64 *rng) { + uint64_t oldstate = rng->state; + pcg_unique_64_step_r(rng); + return pcg_output_xsh_rr_64_32(oldstate); +} + +inline uint32_t pcg_unique_64_xsh_rr_32_boundedrand_r(struct pcg_state_64 *rng, + uint32_t bound) { + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_unique_64_xsh_rr_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_unique_128_xsh_rr_64_random_r(struct pcg_state_128 *rng) { + pcg_unique_128_step_r(rng); + return pcg_output_xsh_rr_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_unique_128_xsh_rr_64_boundedrand_r(struct pcg_state_128 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_unique_128_xsh_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint8_t +pcg_setseq_16_xsh_rr_8_random_r(struct pcg_state_setseq_16 *rng) { + uint16_t oldstate = rng->state; + pcg_setseq_16_step_r(rng); + return pcg_output_xsh_rr_16_8(oldstate); +} + +inline uint8_t +pcg_setseq_16_xsh_rr_8_boundedrand_r(struct pcg_state_setseq_16 *rng, + uint8_t bound) { + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_setseq_16_xsh_rr_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t +pcg_setseq_32_xsh_rr_16_random_r(struct pcg_state_setseq_32 *rng) { + uint32_t oldstate = rng->state; + pcg_setseq_32_step_r(rng); + return pcg_output_xsh_rr_32_16(oldstate); +} + +inline uint16_t +pcg_setseq_32_xsh_rr_16_boundedrand_r(struct pcg_state_setseq_32 *rng, + uint16_t bound) { + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_setseq_32_xsh_rr_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t +pcg_setseq_64_xsh_rr_32_random_r(struct pcg_state_setseq_64 *rng) { + uint64_t oldstate = rng->state; + pcg_setseq_64_step_r(rng); + return pcg_output_xsh_rr_64_32(oldstate); +} + +inline uint32_t +pcg_setseq_64_xsh_rr_32_boundedrand_r(struct pcg_state_setseq_64 *rng, + uint32_t bound) { + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_setseq_64_xsh_rr_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_setseq_128_xsh_rr_64_random_r(struct pcg_state_setseq_128 *rng) { + pcg_setseq_128_step_r(rng); + return pcg_output_xsh_rr_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_setseq_128_xsh_rr_64_boundedrand_r(struct pcg_state_setseq_128 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_setseq_128_xsh_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint8_t pcg_mcg_16_xsh_rr_8_random_r(struct pcg_state_16 *rng) { + uint16_t oldstate = rng->state; + pcg_mcg_16_step_r(rng); + return pcg_output_xsh_rr_16_8(oldstate); +} + +inline uint8_t pcg_mcg_16_xsh_rr_8_boundedrand_r(struct pcg_state_16 *rng, + uint8_t bound) { + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_mcg_16_xsh_rr_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t pcg_mcg_32_xsh_rr_16_random_r(struct pcg_state_32 *rng) { + uint32_t oldstate = rng->state; + pcg_mcg_32_step_r(rng); + return pcg_output_xsh_rr_32_16(oldstate); +} + +inline uint16_t pcg_mcg_32_xsh_rr_16_boundedrand_r(struct pcg_state_32 *rng, + uint16_t bound) { + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_mcg_32_xsh_rr_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t pcg_mcg_64_xsh_rr_32_random_r(struct pcg_state_64 *rng) { + uint64_t oldstate = rng->state; + pcg_mcg_64_step_r(rng); + return pcg_output_xsh_rr_64_32(oldstate); +} + +inline uint32_t pcg_mcg_64_xsh_rr_32_boundedrand_r(struct pcg_state_64 *rng, + uint32_t bound) { + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_mcg_64_xsh_rr_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_mcg_128_xsh_rr_64_random_r(struct pcg_state_128 *rng) { + pcg_mcg_128_step_r(rng); + return pcg_output_xsh_rr_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_mcg_128_xsh_rr_64_boundedrand_r(struct pcg_state_128 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_mcg_128_xsh_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +/* Generation functions for RXS M XS (no MCG versions because they + * don't make sense when you want to use the entire state) + */ + +inline uint8_t pcg_oneseq_8_rxs_m_xs_8_random_r(struct pcg_state_8 *rng) { + uint8_t oldstate = rng->state; + pcg_oneseq_8_step_r(rng); + return pcg_output_rxs_m_xs_8_8(oldstate); +} + +inline uint8_t pcg_oneseq_8_rxs_m_xs_8_boundedrand_r(struct pcg_state_8 *rng, + uint8_t bound) { + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_oneseq_8_rxs_m_xs_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t pcg_oneseq_16_rxs_m_xs_16_random_r(struct pcg_state_16 *rng) { + uint16_t oldstate = rng->state; + pcg_oneseq_16_step_r(rng); + return pcg_output_rxs_m_xs_16_16(oldstate); +} + +inline uint16_t +pcg_oneseq_16_rxs_m_xs_16_boundedrand_r(struct pcg_state_16 *rng, + uint16_t bound) { + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_oneseq_16_rxs_m_xs_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t pcg_oneseq_32_rxs_m_xs_32_random_r(struct pcg_state_32 *rng) { + uint32_t oldstate = rng->state; + pcg_oneseq_32_step_r(rng); + return pcg_output_rxs_m_xs_32_32(oldstate); +} + +inline uint32_t +pcg_oneseq_32_rxs_m_xs_32_boundedrand_r(struct pcg_state_32 *rng, + uint32_t bound) { + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_oneseq_32_rxs_m_xs_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint64_t pcg_oneseq_64_rxs_m_xs_64_random_r(struct pcg_state_64 *rng) { + uint64_t oldstate = rng->state; + pcg_oneseq_64_step_r(rng); + return pcg_output_rxs_m_xs_64_64(oldstate); +} + +inline uint64_t +pcg_oneseq_64_rxs_m_xs_64_boundedrand_r(struct pcg_state_64 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_oneseq_64_rxs_m_xs_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_oneseq_128_rxs_m_xs_128_random_r(struct pcg_state_128 *rng) { + pcg_oneseq_128_step_r(rng); + return pcg_output_rxs_m_xs_128_128(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_oneseq_128_rxs_m_xs_128_boundedrand_r(struct pcg_state_128 *rng, + pcg128_t bound) { + pcg128_t threshold = -bound % bound; + for (;;) { + pcg128_t r = pcg_oneseq_128_rxs_m_xs_128_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint16_t pcg_unique_16_rxs_m_xs_16_random_r(struct pcg_state_16 *rng) { + uint16_t oldstate = rng->state; + pcg_unique_16_step_r(rng); + return pcg_output_rxs_m_xs_16_16(oldstate); +} + +inline uint16_t +pcg_unique_16_rxs_m_xs_16_boundedrand_r(struct pcg_state_16 *rng, + uint16_t bound) { + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_unique_16_rxs_m_xs_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t pcg_unique_32_rxs_m_xs_32_random_r(struct pcg_state_32 *rng) { + uint32_t oldstate = rng->state; + pcg_unique_32_step_r(rng); + return pcg_output_rxs_m_xs_32_32(oldstate); +} + +inline uint32_t +pcg_unique_32_rxs_m_xs_32_boundedrand_r(struct pcg_state_32 *rng, + uint32_t bound) { + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_unique_32_rxs_m_xs_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint64_t pcg_unique_64_rxs_m_xs_64_random_r(struct pcg_state_64 *rng) { + uint64_t oldstate = rng->state; + pcg_unique_64_step_r(rng); + return pcg_output_rxs_m_xs_64_64(oldstate); +} + +inline uint64_t +pcg_unique_64_rxs_m_xs_64_boundedrand_r(struct pcg_state_64 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_unique_64_rxs_m_xs_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_unique_128_rxs_m_xs_128_random_r(struct pcg_state_128 *rng) { + pcg_unique_128_step_r(rng); + return pcg_output_rxs_m_xs_128_128(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_unique_128_rxs_m_xs_128_boundedrand_r(struct pcg_state_128 *rng, + pcg128_t bound) { + pcg128_t threshold = -bound % bound; + for (;;) { + pcg128_t r = pcg_unique_128_rxs_m_xs_128_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint8_t +pcg_setseq_8_rxs_m_xs_8_random_r(struct pcg_state_setseq_8 *rng) { + uint8_t oldstate = rng->state; + pcg_setseq_8_step_r(rng); + return pcg_output_rxs_m_xs_8_8(oldstate); +} + +inline uint8_t +pcg_setseq_8_rxs_m_xs_8_boundedrand_r(struct pcg_state_setseq_8 *rng, + uint8_t bound) { + uint8_t threshold = ((uint8_t)(-bound)) % bound; + for (;;) { + uint8_t r = pcg_setseq_8_rxs_m_xs_8_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint16_t +pcg_setseq_16_rxs_m_xs_16_random_r(struct pcg_state_setseq_16 *rng) { + uint16_t oldstate = rng->state; + pcg_setseq_16_step_r(rng); + return pcg_output_rxs_m_xs_16_16(oldstate); +} + +inline uint16_t +pcg_setseq_16_rxs_m_xs_16_boundedrand_r(struct pcg_state_setseq_16 *rng, + uint16_t bound) { + uint16_t threshold = ((uint16_t)(-bound)) % bound; + for (;;) { + uint16_t r = pcg_setseq_16_rxs_m_xs_16_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint32_t +pcg_setseq_32_rxs_m_xs_32_random_r(struct pcg_state_setseq_32 *rng) { + uint32_t oldstate = rng->state; + pcg_setseq_32_step_r(rng); + return pcg_output_rxs_m_xs_32_32(oldstate); +} + +inline uint32_t +pcg_setseq_32_rxs_m_xs_32_boundedrand_r(struct pcg_state_setseq_32 *rng, + uint32_t bound) { + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_setseq_32_rxs_m_xs_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +inline uint64_t +pcg_setseq_64_rxs_m_xs_64_random_r(struct pcg_state_setseq_64 *rng) { + uint64_t oldstate = rng->state; + pcg_setseq_64_step_r(rng); + return pcg_output_rxs_m_xs_64_64(oldstate); +} + +inline uint64_t +pcg_setseq_64_rxs_m_xs_64_boundedrand_r(struct pcg_state_setseq_64 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_setseq_64_rxs_m_xs_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_setseq_128_rxs_m_xs_128_random_r(struct pcg_state_setseq_128 *rng) { + pcg_setseq_128_step_r(rng); + return pcg_output_rxs_m_xs_128_128(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_setseq_128_rxs_m_xs_128_boundedrand_r(struct pcg_state_setseq_128 *rng, + pcg128_t bound) { + pcg128_t threshold = -bound % bound; + for (;;) { + pcg128_t r = pcg_setseq_128_rxs_m_xs_128_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +/* Generation functions for XSL RR (only defined for "large" types) */ + +inline uint32_t pcg_oneseq_64_xsl_rr_32_random_r(struct pcg_state_64 *rng) { + uint64_t oldstate = rng->state; + pcg_oneseq_64_step_r(rng); + return pcg_output_xsl_rr_64_32(oldstate); +} + +inline uint32_t pcg_oneseq_64_xsl_rr_32_boundedrand_r(struct pcg_state_64 *rng, + uint32_t bound) { + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_oneseq_64_xsl_rr_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_oneseq_128_xsl_rr_64_random_r(struct pcg_state_128 *rng) { + pcg_oneseq_128_step_r(rng); + return pcg_output_xsl_rr_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_oneseq_128_xsl_rr_64_boundedrand_r(struct pcg_state_128 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_oneseq_128_xsl_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint32_t pcg_unique_64_xsl_rr_32_random_r(struct pcg_state_64 *rng) { + uint64_t oldstate = rng->state; + pcg_unique_64_step_r(rng); + return pcg_output_xsl_rr_64_32(oldstate); +} + +inline uint32_t pcg_unique_64_xsl_rr_32_boundedrand_r(struct pcg_state_64 *rng, + uint32_t bound) { + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_unique_64_xsl_rr_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_unique_128_xsl_rr_64_random_r(struct pcg_state_128 *rng) { + pcg_unique_128_step_r(rng); + return pcg_output_xsl_rr_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_unique_128_xsl_rr_64_boundedrand_r(struct pcg_state_128 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_unique_128_xsl_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint32_t +pcg_setseq_64_xsl_rr_32_random_r(struct pcg_state_setseq_64 *rng) { + uint64_t oldstate = rng->state; + pcg_setseq_64_step_r(rng); + return pcg_output_xsl_rr_64_32(oldstate); +} + +inline uint32_t +pcg_setseq_64_xsl_rr_32_boundedrand_r(struct pcg_state_setseq_64 *rng, + uint32_t bound) { + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_setseq_64_xsl_rr_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_setseq_128_xsl_rr_64_random_r(struct pcg_state_setseq_128 *rng) { + pcg_setseq_128_step_r(rng); + return pcg_output_xsl_rr_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t +pcg_setseq_128_xsl_rr_64_boundedrand_r(struct pcg_state_setseq_128 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_setseq_128_xsl_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint32_t pcg_mcg_64_xsl_rr_32_random_r(struct pcg_state_64 *rng) { + uint64_t oldstate = rng->state; + pcg_mcg_64_step_r(rng); + return pcg_output_xsl_rr_64_32(oldstate); +} + +inline uint32_t pcg_mcg_64_xsl_rr_32_boundedrand_r(struct pcg_state_64 *rng, + uint32_t bound) { + uint32_t threshold = -bound % bound; + for (;;) { + uint32_t r = pcg_mcg_64_xsl_rr_32_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_mcg_128_xsl_rr_64_random_r(struct pcg_state_128 *rng) { + pcg_mcg_128_step_r(rng); + return pcg_output_xsl_rr_128_64(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline uint64_t pcg_mcg_128_xsl_rr_64_boundedrand_r(struct pcg_state_128 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_mcg_128_xsl_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +/* Generation functions for XSL RR RR (only defined for "large" types) */ + +inline uint64_t pcg_oneseq_64_xsl_rr_rr_64_random_r(struct pcg_state_64 *rng) { + uint64_t oldstate = rng->state; + pcg_oneseq_64_step_r(rng); + return pcg_output_xsl_rr_rr_64_64(oldstate); +} + +inline uint64_t +pcg_oneseq_64_xsl_rr_rr_64_boundedrand_r(struct pcg_state_64 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_oneseq_64_xsl_rr_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_oneseq_128_xsl_rr_rr_128_random_r(struct pcg_state_128 *rng) { + pcg_oneseq_128_step_r(rng); + return pcg_output_xsl_rr_rr_128_128(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_oneseq_128_xsl_rr_rr_128_boundedrand_r(struct pcg_state_128 *rng, + pcg128_t bound) { + pcg128_t threshold = -bound % bound; + for (;;) { + pcg128_t r = pcg_oneseq_128_xsl_rr_rr_128_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint64_t pcg_unique_64_xsl_rr_rr_64_random_r(struct pcg_state_64 *rng) { + uint64_t oldstate = rng->state; + pcg_unique_64_step_r(rng); + return pcg_output_xsl_rr_rr_64_64(oldstate); +} + +inline uint64_t +pcg_unique_64_xsl_rr_rr_64_boundedrand_r(struct pcg_state_64 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_unique_64_xsl_rr_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_unique_128_xsl_rr_rr_128_random_r(struct pcg_state_128 *rng) { + pcg_unique_128_step_r(rng); + return pcg_output_xsl_rr_rr_128_128(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_unique_128_xsl_rr_rr_128_boundedrand_r(struct pcg_state_128 *rng, + pcg128_t bound) { + pcg128_t threshold = -bound % bound; + for (;;) { + pcg128_t r = pcg_unique_128_xsl_rr_rr_128_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +inline uint64_t +pcg_setseq_64_xsl_rr_rr_64_random_r(struct pcg_state_setseq_64 *rng) { + uint64_t oldstate = rng->state; + pcg_setseq_64_step_r(rng); + return pcg_output_xsl_rr_rr_64_64(oldstate); +} + +inline uint64_t +pcg_setseq_64_xsl_rr_rr_64_boundedrand_r(struct pcg_state_setseq_64 *rng, + uint64_t bound) { + uint64_t threshold = -bound % bound; + for (;;) { + uint64_t r = pcg_setseq_64_xsl_rr_rr_64_random_r(rng); + if (r >= threshold) + return r % bound; + } +} + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_setseq_128_xsl_rr_rr_128_random_r(struct pcg_state_setseq_128 *rng) { + pcg_setseq_128_step_r(rng); + return pcg_output_xsl_rr_rr_128_128(rng->state); +} +#endif + +#if PCG_HAS_128BIT_OPS +inline pcg128_t +pcg_setseq_128_xsl_rr_rr_128_boundedrand_r(struct pcg_state_setseq_128 *rng, + pcg128_t bound) { + pcg128_t threshold = -bound % bound; + for (;;) { + pcg128_t r = pcg_setseq_128_xsl_rr_rr_128_random_r(rng); + if (r >= threshold) + return r % bound; + } +} +#endif + +//// Typedefs +typedef struct pcg_state_setseq_64 pcg32_random_t; +typedef struct pcg_state_64 pcg32s_random_t; +typedef struct pcg_state_64 pcg32u_random_t; +typedef struct pcg_state_64 pcg32f_random_t; +//// random_r +#define pcg32_random_r pcg_setseq_64_xsh_rr_32_random_r +#define pcg32s_random_r pcg_oneseq_64_xsh_rr_32_random_r +#define pcg32u_random_r pcg_unique_64_xsh_rr_32_random_r +#define pcg32f_random_r pcg_mcg_64_xsh_rs_32_random_r +//// boundedrand_r +#define pcg32_boundedrand_r pcg_setseq_64_xsh_rr_32_boundedrand_r +#define pcg32s_boundedrand_r pcg_oneseq_64_xsh_rr_32_boundedrand_r +#define pcg32u_boundedrand_r pcg_unique_64_xsh_rr_32_boundedrand_r +#define pcg32f_boundedrand_r pcg_mcg_64_xsh_rs_32_boundedrand_r +//// srandom_r +#define pcg32_srandom_r pcg_setseq_64_srandom_r +#define pcg32s_srandom_r pcg_oneseq_64_srandom_r +#define pcg32u_srandom_r pcg_unique_64_srandom_r +#define pcg32f_srandom_r pcg_mcg_64_srandom_r +//// advance_r +#define pcg32_advance_r pcg_setseq_64_advance_r +#define pcg32s_advance_r pcg_oneseq_64_advance_r +#define pcg32u_advance_r pcg_unique_64_advance_r +#define pcg32f_advance_r pcg_mcg_64_advance_r + +#if PCG_HAS_128BIT_OPS +//// Typedefs +typedef struct pcg_state_setseq_128 pcg64_random_t; +typedef struct pcg_state_128 pcg64s_random_t; +typedef struct pcg_state_128 pcg64u_random_t; +typedef struct pcg_state_128 pcg64f_random_t; +//// random_r +#define pcg64_random_r pcg_setseq_128_xsl_rr_64_random_r +#define pcg64s_random_r pcg_oneseq_128_xsl_rr_64_random_r +#define pcg64u_random_r pcg_unique_128_xsl_rr_64_random_r +#define pcg64f_random_r pcg_mcg_128_xsl_rr_64_random_r +//// boundedrand_r +#define pcg64_boundedrand_r pcg_setseq_128_xsl_rr_64_boundedrand_r +#define pcg64s_boundedrand_r pcg_oneseq_128_xsl_rr_64_boundedrand_r +#define pcg64u_boundedrand_r pcg_unique_128_xsl_rr_64_boundedrand_r +#define pcg64f_boundedrand_r pcg_mcg_128_xsl_rr_64_boundedrand_r +//// srandom_r +#define pcg64_srandom_r pcg_setseq_128_srandom_r +#define pcg64s_srandom_r pcg_oneseq_128_srandom_r +#define pcg64u_srandom_r pcg_unique_128_srandom_r +#define pcg64f_srandom_r pcg_mcg_128_srandom_r +//// advance_r +#define pcg64_advance_r pcg_setseq_128_advance_r +#define pcg64s_advance_r pcg_oneseq_128_advance_r +#define pcg64u_advance_r pcg_unique_128_advance_r +#define pcg64f_advance_r pcg_mcg_128_advance_r +#endif + +//// Typedefs +typedef struct pcg_state_8 pcg8si_random_t; +typedef struct pcg_state_16 pcg16si_random_t; +typedef struct pcg_state_32 pcg32si_random_t; +typedef struct pcg_state_64 pcg64si_random_t; +//// random_r +#define pcg8si_random_r pcg_oneseq_8_rxs_m_xs_8_random_r +#define pcg16si_random_r pcg_oneseq_16_rxs_m_xs_16_random_r +#define pcg32si_random_r pcg_oneseq_32_rxs_m_xs_32_random_r +#define pcg64si_random_r pcg_oneseq_64_rxs_m_xs_64_random_r +//// boundedrand_r +#define pcg8si_boundedrand_r pcg_oneseq_8_rxs_m_xs_8_boundedrand_r +#define pcg16si_boundedrand_r pcg_oneseq_16_rxs_m_xs_16_boundedrand_r +#define pcg32si_boundedrand_r pcg_oneseq_32_rxs_m_xs_32_boundedrand_r +#define pcg64si_boundedrand_r pcg_oneseq_64_rxs_m_xs_64_boundedrand_r +//// srandom_r +#define pcg8si_srandom_r pcg_oneseq_8_srandom_r +#define pcg16si_srandom_r pcg_oneseq_16_srandom_r +#define pcg32si_srandom_r pcg_oneseq_32_srandom_r +#define pcg64si_srandom_r pcg_oneseq_64_srandom_r +//// advance_r +#define pcg8si_advance_r pcg_oneseq_8_advance_r +#define pcg16si_advance_r pcg_oneseq_16_advance_r +#define pcg32si_advance_r pcg_oneseq_32_advance_r +#define pcg64si_advance_r pcg_oneseq_64_advance_r + +#if PCG_HAS_128BIT_OPS +typedef struct pcg_state_128 pcg128si_random_t; +#define pcg128si_random_r pcg_oneseq_128_rxs_m_xs_128_random_r +#define pcg128si_boundedrand_r pcg_oneseq_128_rxs_m_xs_128_boundedrand_r +#define pcg128si_srandom_r pcg_oneseq_128_srandom_r +#define pcg128si_advance_r pcg_oneseq_128_advance_r +#endif + +//// Typedefs +typedef struct pcg_state_setseq_8 pcg8i_random_t; +typedef struct pcg_state_setseq_16 pcg16i_random_t; +typedef struct pcg_state_setseq_32 pcg32i_random_t; +typedef struct pcg_state_setseq_64 pcg64i_random_t; +//// random_r +#define pcg8i_random_r pcg_setseq_8_rxs_m_xs_8_random_r +#define pcg16i_random_r pcg_setseq_16_rxs_m_xs_16_random_r +#define pcg32i_random_r pcg_setseq_32_rxs_m_xs_32_random_r +#define pcg64i_random_r pcg_setseq_64_rxs_m_xs_64_random_r +//// boundedrand_r +#define pcg8i_boundedrand_r pcg_setseq_8_rxs_m_xs_8_boundedrand_r +#define pcg16i_boundedrand_r pcg_setseq_16_rxs_m_xs_16_boundedrand_r +#define pcg32i_boundedrand_r pcg_setseq_32_rxs_m_xs_32_boundedrand_r +#define pcg64i_boundedrand_r pcg_setseq_64_rxs_m_xs_64_boundedrand_r +//// srandom_r +#define pcg8i_srandom_r pcg_setseq_8_srandom_r +#define pcg16i_srandom_r pcg_setseq_16_srandom_r +#define pcg32i_srandom_r pcg_setseq_32_srandom_r +#define pcg64i_srandom_r pcg_setseq_64_srandom_r +//// advance_r +#define pcg8i_advance_r pcg_setseq_8_advance_r +#define pcg16i_advance_r pcg_setseq_16_advance_r +#define pcg32i_advance_r pcg_setseq_32_advance_r +#define pcg64i_advance_r pcg_setseq_64_advance_r + +#if PCG_HAS_128BIT_OPS +typedef struct pcg_state_setseq_128 pcg128i_random_t; +#define pcg128i_random_r pcg_setseq_128_rxs_m_xs_128_random_r +#define pcg128i_boundedrand_r pcg_setseq_128_rxs_m_xs_128_boundedrand_r +#define pcg128i_srandom_r pcg_setseq_128_srandom_r +#define pcg128i_advance_r pcg_setseq_128_advance_r +#endif + +extern uint32_t pcg32_random(); +extern uint32_t pcg32_boundedrand(uint32_t bound); +extern void pcg32_srandom(uint64_t seed, uint64_t seq); +extern void pcg32_advance(uint64_t delta); + +#if PCG_HAS_128BIT_OPS +extern uint64_t pcg64_random(); +extern uint64_t pcg64_boundedrand(uint64_t bound); +extern void pcg64_srandom(pcg128_t seed, pcg128_t seq); +extern void pcg64_advance(pcg128_t delta); +#endif + +/* + * Static initialization constants (if you can't call srandom for some + * bizarre reason). + */ + +#define PCG32_INITIALIZER PCG_STATE_SETSEQ_64_INITIALIZER +#define PCG32U_INITIALIZER PCG_STATE_UNIQUE_64_INITIALIZER +#define PCG32S_INITIALIZER PCG_STATE_ONESEQ_64_INITIALIZER +#define PCG32F_INITIALIZER PCG_STATE_MCG_64_INITIALIZER + +#if PCG_HAS_128BIT_OPS +#define PCG64_INITIALIZER PCG_STATE_SETSEQ_128_INITIALIZER +#define PCG64U_INITIALIZER PCG_STATE_UNIQUE_128_INITIALIZER +#define PCG64S_INITIALIZER PCG_STATE_ONESEQ_128_INITIALIZER +#define PCG64F_INITIALIZER PCG_STATE_MCG_128_INITIALIZER +#endif + +#define PCG8SI_INITIALIZER PCG_STATE_ONESEQ_8_INITIALIZER +#define PCG16SI_INITIALIZER PCG_STATE_ONESEQ_16_INITIALIZER +#define PCG32SI_INITIALIZER PCG_STATE_ONESEQ_32_INITIALIZER +#define PCG64SI_INITIALIZER PCG_STATE_ONESEQ_64_INITIALIZER +#if PCG_HAS_128BIT_OPS +#define PCG128SI_INITIALIZER PCG_STATE_ONESEQ_128_INITIALIZER +#endif + +#define PCG8I_INITIALIZER PCG_STATE_SETSEQ_8_INITIALIZER +#define PCG16I_INITIALIZER PCG_STATE_SETSEQ_16_INITIALIZER +#define PCG32I_INITIALIZER PCG_STATE_SETSEQ_32_INITIALIZER +#define PCG64I_INITIALIZER PCG_STATE_SETSEQ_64_INITIALIZER +#if PCG_HAS_128BIT_OPS +#define PCG128I_INITIALIZER PCG_STATE_SETSEQ_128_INITIALIZER +#endif + +#if __cplusplus +} +#endif + +#endif // PCG_VARIANTS_H_INCLUDED diff --git a/numpy/random/src/philox/LICENSE.md b/numpy/random/src/philox/LICENSE.md new file mode 100644 index 000000000..4a9f6bb29 --- /dev/null +++ b/numpy/random/src/philox/LICENSE.md @@ -0,0 +1,31 @@ +# THREEFRY + +Copyright 2010-2012, D. E. Shaw Research. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright + notice, this list of conditions, and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +* Neither the name of D. E. Shaw Research nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/numpy/random/src/philox/philox-benchmark.c b/numpy/random/src/philox/philox-benchmark.c new file mode 100644 index 000000000..0cab04cf5 --- /dev/null +++ b/numpy/random/src/philox/philox-benchmark.c @@ -0,0 +1,38 @@ +/* + * Simple benchamrk command + * + * cl philox-benchmark.c /Ox + * + * gcc philox-benchmark.c -O3 -o philox-benchmark + * + * Requres the Random123 directory containing header files to be located in the + * same directory (not included). + */ +#include "Random123/philox.h" +#include <inttypes.h> +#include <stdio.h> +#include <time.h> + +#define N 1000000000 + +int main() { + philox4x64_ctr_t ctr = {{0, 0, 0, 0}}; + philox4x64_key_t key = {{0, 0xDEADBEAF}}; + philox4x64_ctr_t out; + uint64_t count = 0, sum = 0; + int i, j; + clock_t begin = clock(); + for (i = 0; i < N / 4UL; i++) { + ctr.v[0]++; + out = philox4x64_R(philox4x64_rounds, ctr, key); + for (j = 0; j < 4; j++) { + sum += out.v[j]; + count++; + } + } + clock_t end = clock(); + double time_spent = (double)(end - begin) / CLOCKS_PER_SEC; + printf("0x%" PRIx64 "\ncount: %" PRIu64 "\n", sum, count); + printf("%" PRIu64 " randoms per second\n", + (uint64_t)(N / time_spent) / 1000000 * 1000000); +} diff --git a/numpy/random/src/philox/philox-test-data-gen.c b/numpy/random/src/philox/philox-test-data-gen.c new file mode 100644 index 000000000..442e18b55 --- /dev/null +++ b/numpy/random/src/philox/philox-test-data-gen.c @@ -0,0 +1,82 @@ +/* + * Generate testing csv files + * + * cl philox-test-data-gen.c /Ox + * philox-test-data-gen.exe + * + * gcc philox-test-data-gen.c -o philox-test-data-gen + * ./philox-test-data-gen + * + * Requres the Random123 directory containing header files to be located in the + * same directory (not included). + * + */ + +#include "../splitmix64/splitmix64.h" +#include "Random123/philox.h" +#include <inttypes.h> +#include <stdio.h> + +#define N 1000 + +int main() { + philox4x64_ctr_t ctr = {{0, 0, 0, 0}}; + philox4x64_key_t key = {{0, 0}}; + uint64_t state, seed = 0xDEADBEAF; + philox4x64_ctr_t out; + uint64_t store[N]; + state = seed; + int i, j; + for (i = 0; i < 2; i++) { + key.v[i] = splitmix64_next(&state); + } + for (i = 0; i < N / 4UL; i++) { + ctr.v[0]++; + out = philox4x64_R(philox4x64_rounds, ctr, key); + for (j = 0; j < 4; j++) { + store[i * 4 + j] = out.v[j]; + } + } + + FILE *fp; + fp = fopen("philox-testset-1.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); + + ctr.v[0] = 0; + state = seed = 0; + for (i = 0; i < 2; i++) { + key.v[i] = splitmix64_next(&state); + } + for (i = 0; i < N / 4UL; i++) { + ctr.v[0]++; + out = philox4x64_R(philox4x64_rounds, ctr, key); + for (j = 0; j < 4; j++) { + store[i * 4 + j] = out.v[j]; + } + } + + fp = fopen("philox-testset-2.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); +} diff --git a/numpy/random/src/philox/philox.c b/numpy/random/src/philox/philox.c new file mode 100644 index 000000000..3382c60d6 --- /dev/null +++ b/numpy/random/src/philox/philox.c @@ -0,0 +1,29 @@ +#include "philox.h" + +extern INLINE uint64_t philox_next64(philox_state *state); + +extern INLINE uint32_t philox_next32(philox_state *state); + +extern void philox_jump(philox_state *state) { + /* Advances state as-if 2^128 draws were made */ + state->ctr->v[2]++; + if (state->ctr->v[2] == 0) { + state->ctr->v[3]++; + } +} + +extern void philox_advance(uint64_t *step, philox_state *state) { + int i, carry = 0; + uint64_t v_orig; + for (i = 0; i < 4; i++) { + if (carry == 1) { + state->ctr->v[i]++; + carry = state->ctr->v[i] == 0 ? 1 : 0; + } + v_orig = state->ctr->v[i]; + state->ctr->v[i] += step[i]; + if (state->ctr->v[i] < v_orig && carry == 0) { + carry = 1; + } + } +} diff --git a/numpy/random/src/philox/philox.h b/numpy/random/src/philox/philox.h new file mode 100644 index 000000000..411404b55 --- /dev/null +++ b/numpy/random/src/philox/philox.h @@ -0,0 +1,253 @@ +#ifndef _RANDOMDGEN__PHILOX_H_ +#define _RANDOMDGEN__PHILOX_H_ + +#include <inttypes.h> + +#ifdef _WIN32 +#define INLINE __inline __forceinline +#else +#define INLINE inline +#endif + +#define PHILOX_BUFFER_SIZE 4L + +struct r123array2x64 { + uint64_t v[2]; +}; +struct r123array4x64 { + uint64_t v[4]; +}; + +enum r123_enum_philox4x64 { philox4x64_rounds = 10 }; +typedef struct r123array4x64 philox4x64_ctr_t; +typedef struct r123array2x64 philox4x64_key_t; +typedef struct r123array2x64 philox4x64_ukey_t; + +static INLINE struct r123array2x64 +_philox4x64bumpkey(struct r123array2x64 key) { + key.v[0] += (0x9E3779B97F4A7C15ULL); + key.v[1] += (0xBB67AE8584CAA73BULL); + return key; +} + +#ifdef _WIN32 +#include <intrin.h> +/* TODO: This isn't correct for many platforms */ +#ifdef _WIN64 +#pragma intrinsic(_umul128) +#else +#pragma intrinsic(__emulu) +static INLINE uint64_t _umul128(uint64_t a, uint64_t b, uint64_t *high) { + + uint64_t a_lo, a_hi, b_lo, b_hi, a_x_b_hi, a_x_b_mid, a_x_b_lo, b_x_a_mid, + carry_bit; + a_lo = (uint32_t)a; + a_hi = a >> 32; + b_lo = (uint32_t)b; + b_hi = b >> 32; + + a_x_b_hi = __emulu(a_hi, b_hi); + a_x_b_mid = __emulu(a_hi, b_lo); + b_x_a_mid = __emulu(b_hi, a_lo); + a_x_b_lo = __emulu(a_lo, b_lo); + + carry_bit = ((uint64_t)(uint32_t)a_x_b_mid + (uint64_t)(uint32_t)b_x_a_mid + + (a_x_b_lo >> 32)) >> + 32; + + *high = a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit; + + return a_x_b_lo + ((a_x_b_mid + b_x_a_mid) << 32); +} +#endif +static INLINE uint64_t mulhilo64(uint64_t a, uint64_t b, uint64_t *hip) { + return _umul128(a, b, hip); +} +#else +#if __SIZEOF_INT128__ +static INLINE uint64_t mulhilo64(uint64_t a, uint64_t b, uint64_t *hip) { + __uint128_t product = ((__uint128_t)a) * ((__uint128_t)b); + *hip = product >> 64; + return (uint64_t)product; +} +#else +static INLINE uint64_t _umul128(uint64_t a, uint64_t b, uint64_t *high) { + + uint64_t a_lo, a_hi, b_lo, b_hi, a_x_b_hi, a_x_b_mid, a_x_b_lo, b_x_a_mid, + carry_bit; + a_lo = (uint32_t)a; + a_hi = a >> 32; + b_lo = (uint32_t)b; + b_hi = b >> 32; + + a_x_b_hi = a_hi * b_hi; + a_x_b_mid = a_hi * b_lo; + b_x_a_mid = b_hi * a_lo; + a_x_b_lo = a_lo * b_lo; + + carry_bit = ((uint64_t)(uint32_t)a_x_b_mid + (uint64_t)(uint32_t)b_x_a_mid + + (a_x_b_lo >> 32)) >> + 32; + + *high = a_x_b_hi + (a_x_b_mid >> 32) + (b_x_a_mid >> 32) + carry_bit; + + return a_x_b_lo + ((a_x_b_mid + b_x_a_mid) << 32); +} +static INLINE uint64_t mulhilo64(uint64_t a, uint64_t b, uint64_t *hip) { + return _umul128(a, b, hip); +} +#endif +#endif + +static INLINE struct r123array4x64 _philox4x64round(struct r123array4x64 ctr, + struct r123array2x64 key); + +static INLINE struct r123array4x64 _philox4x64round(struct r123array4x64 ctr, + struct r123array2x64 key) { + uint64_t hi0; + uint64_t hi1; + uint64_t lo0 = mulhilo64((0xD2E7470EE14C6C93ULL), ctr.v[0], &hi0); + uint64_t lo1 = mulhilo64((0xCA5A826395121157ULL), ctr.v[2], &hi1); + struct r123array4x64 out = { + {hi1 ^ ctr.v[1] ^ key.v[0], lo1, hi0 ^ ctr.v[3] ^ key.v[1], lo0}}; + return out; +} + +static INLINE philox4x64_key_t philox4x64keyinit(philox4x64_ukey_t uk) { + return uk; +} +static INLINE philox4x64_ctr_t philox4x64_R(unsigned int R, + philox4x64_ctr_t ctr, + philox4x64_key_t key); + +static INLINE philox4x64_ctr_t philox4x64_R(unsigned int R, + philox4x64_ctr_t ctr, + philox4x64_key_t key) { + if (R > 0) { + ctr = _philox4x64round(ctr, key); + } + if (R > 1) { + key = _philox4x64bumpkey(key); + ctr = _philox4x64round(ctr, key); + } + if (R > 2) { + key = _philox4x64bumpkey(key); + ctr = _philox4x64round(ctr, key); + } + if (R > 3) { + key = _philox4x64bumpkey(key); + ctr = _philox4x64round(ctr, key); + } + if (R > 4) { + key = _philox4x64bumpkey(key); + ctr = _philox4x64round(ctr, key); + } + if (R > 5) { + key = _philox4x64bumpkey(key); + ctr = _philox4x64round(ctr, key); + } + if (R > 6) { + key = _philox4x64bumpkey(key); + ctr = _philox4x64round(ctr, key); + } + if (R > 7) { + key = _philox4x64bumpkey(key); + ctr = _philox4x64round(ctr, key); + } + if (R > 8) { + key = _philox4x64bumpkey(key); + ctr = _philox4x64round(ctr, key); + } + if (R > 9) { + key = _philox4x64bumpkey(key); + ctr = _philox4x64round(ctr, key); + } + if (R > 10) { + key = _philox4x64bumpkey(key); + ctr = _philox4x64round(ctr, key); + } + if (R > 11) { + key = _philox4x64bumpkey(key); + ctr = _philox4x64round(ctr, key); + } + if (R > 12) { + key = _philox4x64bumpkey(key); + ctr = _philox4x64round(ctr, key); + } + if (R > 13) { + key = _philox4x64bumpkey(key); + ctr = _philox4x64round(ctr, key); + } + if (R > 14) { + key = _philox4x64bumpkey(key); + ctr = _philox4x64round(ctr, key); + } + if (R > 15) { + key = _philox4x64bumpkey(key); + ctr = _philox4x64round(ctr, key); + } + return ctr; +} + +typedef struct s_philox_state { + philox4x64_ctr_t *ctr; + philox4x64_key_t *key; + int buffer_pos; + uint64_t buffer[PHILOX_BUFFER_SIZE]; + int has_uint32; + uint32_t uinteger; +} philox_state; + +static INLINE uint64_t philox_next(philox_state *state) { + uint64_t out; + int i; + philox4x64_ctr_t ct; + + if (state->buffer_pos < PHILOX_BUFFER_SIZE) { + out = state->buffer[state->buffer_pos]; + state->buffer_pos++; + return out; + } + /* generate 4 new uint64_t */ + state->ctr->v[0]++; + /* Handle carry */ + if (state->ctr->v[0] == 0) { + state->ctr->v[1]++; + if (state->ctr->v[1] == 0) { + state->ctr->v[2]++; + if (state->ctr->v[2] == 0) { + state->ctr->v[3]++; + } + } + } + ct = philox4x64_R(philox4x64_rounds, *state->ctr, *state->key); + for (i = 0; i < 4; i++) { + state->buffer[i] = ct.v[i]; + } + state->buffer_pos = 1; + return state->buffer[0]; +} + +static INLINE uint64_t philox_next64(philox_state *state) { + return philox_next(state); +} + +static INLINE uint32_t philox_next32(philox_state *state) { + uint64_t next; + + if (state->has_uint32) { + state->has_uint32 = 0; + return state->uinteger; + } + next = philox_next(state); + + state->has_uint32 = 1; + state->uinteger = (uint32_t)(next >> 32); + return (uint32_t)(next & 0xffffffff); +} + +extern void philox_jump(philox_state *state); + +extern void philox_advance(uint64_t *step, philox_state *state); + +#endif diff --git a/numpy/random/src/splitmix64/LICENSE.md b/numpy/random/src/splitmix64/LICENSE.md new file mode 100644 index 000000000..3c4d73b92 --- /dev/null +++ b/numpy/random/src/splitmix64/LICENSE.md @@ -0,0 +1,9 @@ +# SPLITMIX64 + +Written in 2015 by Sebastiano Vigna (vigna@acm.org) + +To the extent possible under law, the author has dedicated all copyright +and related and neighboring rights to this software to the public domain +worldwide. This software is distributed without any warranty. + +See <http://creativecommons.org/publicdomain/zero/1.0/>.
\ No newline at end of file diff --git a/numpy/random/src/splitmix64/splitmix64.c b/numpy/random/src/splitmix64/splitmix64.c new file mode 100644 index 000000000..79a845982 --- /dev/null +++ b/numpy/random/src/splitmix64/splitmix64.c @@ -0,0 +1,29 @@ +/* Written in 2015 by Sebastiano Vigna (vigna@acm.org) + +To the extent possible under law, the author has dedicated all copyright +and related and neighboring rights to this software to the public domain +worldwide. This software is distributed without any warranty. + +See <http://creativecommons.org/publicdomain/zero/1.0/>. + +Modified 2018 by Kevin Sheppard. Modifications licensed under the NCSA +license. +*/ + +/* This is a fixed-increment version of Java 8's SplittableRandom generator + See http://dx.doi.org/10.1145/2714064.2660195 and + http://docs.oracle.com/javase/8/docs/api/java/util/SplittableRandom.html + + It is a very fast generator passing BigCrush, and it can be useful if + for some reason you absolutely want 64 bits of state; otherwise, we + rather suggest to use a xoroshiro128+ (for moderately parallel + computations) or xorshift1024* (for massively parallel computations) + generator. */ + +#include "splitmix64.h" + +extern inline uint64_t splitmix64_next(uint64_t *state); + +extern inline uint64_t splitmix64_next64(splitmix64_state *state); + +extern inline uint32_t splitmix64_next32(splitmix64_state *state); diff --git a/numpy/random/src/splitmix64/splitmix64.h b/numpy/random/src/splitmix64/splitmix64.h new file mode 100644 index 000000000..880132970 --- /dev/null +++ b/numpy/random/src/splitmix64/splitmix64.h @@ -0,0 +1,39 @@ +#ifdef _WIN32 +#if _MSC_VER == 1500 +#include "../common/inttypes.h" +#define inline __forceinline +#else +#include <inttypes.h> +#endif +#else +#include <inttypes.h> +#endif + +typedef struct s_splitmix64_state { + uint64_t state; + int has_uint32; + uint32_t uinteger; +} splitmix64_state; + +static inline uint64_t splitmix64_next(uint64_t *state) { + uint64_t z = (state[0] += 0x9e3779b97f4a7c15); + z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9; + z = (z ^ (z >> 27)) * 0x94d049bb133111eb; + return z ^ (z >> 31); +} + +static inline uint64_t splitmix64_next64(splitmix64_state *state) { + return splitmix64_next(&state->state); +} + +static inline uint32_t splitmix64_next32(splitmix64_state *state) { + uint64_t next; + if (state->has_uint32) { + state->has_uint32 = 0; + return state->uinteger; + } + next = splitmix64_next64(state); + state->has_uint32 = 1; + state->uinteger = (uint32_t)(next >> 32); + return (uint32_t)(next & 0xffffffff); +} diff --git a/numpy/random/src/splitmix64/splitmix64.orig.c b/numpy/random/src/splitmix64/splitmix64.orig.c new file mode 100644 index 000000000..df6133aab --- /dev/null +++ b/numpy/random/src/splitmix64/splitmix64.orig.c @@ -0,0 +1,28 @@ +/* Written in 2015 by Sebastiano Vigna (vigna@acm.org) + +To the extent possible under law, the author has dedicated all copyright +and related and neighboring rights to this software to the public domain +worldwide. This software is distributed without any warranty. + +See <http://creativecommons.org/publicdomain/zero/1.0/>. */ + +#include <stdint.h> + +/* This is a fixed-increment version of Java 8's SplittableRandom generator + See http://dx.doi.org/10.1145/2714064.2660195 and + http://docs.oracle.com/javase/8/docs/api/java/util/SplittableRandom.html + + It is a very fast generator passing BigCrush, and it can be useful if + for some reason you absolutely want 64 bits of state; otherwise, we + rather suggest to use a xoroshiro128+ (for moderately parallel + computations) or xorshift1024* (for massively parallel computations) + generator. */ + +uint64_t x; /* The state can be seeded with any value. */ + +uint64_t next() { + uint64_t z = (x += 0x9e3779b97f4a7c15); + z = (z ^ (z >> 30)) * 0xbf58476d1ce4e5b9; + z = (z ^ (z >> 27)) * 0x94d049bb133111eb; + return z ^ (z >> 31); +} diff --git a/numpy/random/src/threefry/LICENSE.md b/numpy/random/src/threefry/LICENSE.md new file mode 100644 index 000000000..4a9f6bb29 --- /dev/null +++ b/numpy/random/src/threefry/LICENSE.md @@ -0,0 +1,31 @@ +# THREEFRY + +Copyright 2010-2012, D. E. Shaw Research. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright + notice, this list of conditions, and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +* Neither the name of D. E. Shaw Research nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/numpy/random/src/threefry/threefry-benchmark.c b/numpy/random/src/threefry/threefry-benchmark.c new file mode 100644 index 000000000..6d6239cd3 --- /dev/null +++ b/numpy/random/src/threefry/threefry-benchmark.c @@ -0,0 +1,38 @@ +/* + * Simple benchamrk command + * + * cl threefry-benchmark.c /Ox + * + * gcc threefry-benchmark.c -O3 -o threefry-benchmark + * + * Requres the Random123 directory containing header files to be located in the + * same directory (not included). + */ +#include "Random123/threefry.h" +#include <inttypes.h> +#include <stdio.h> +#include <time.h> + +#define N 1000000000 + +int main() { + threefry4x64_key_t ctr = {{0, 0, 0, 0}}; + threefry4x64_ctr_t key = {{0xDEADBEAF, 0, 0, 0}}; + threefry4x64_ctr_t out; + uint64_t count = 0, sum = 0; + int i, j; + clock_t begin = clock(); + for (i = 0; i < N / 4UL; i++) { + ctr.v[0]++; + out = threefry4x64_R(threefry4x64_rounds, ctr, key); + for (j = 0; j < 4; j++) { + sum += out.v[j]; + count++; + } + } + clock_t end = clock(); + double time_spent = (double)(end - begin) / CLOCKS_PER_SEC; + printf("0x%" PRIx64 "\ncount: %" PRIu64 "\n", sum, count); + printf("%" PRIu64 " randoms per second\n", + (uint64_t)(N / time_spent) / 1000000 * 1000000); +} diff --git a/numpy/random/src/threefry/threefry-orig.c b/numpy/random/src/threefry/threefry-orig.c new file mode 100644 index 000000000..d27cfd797 --- /dev/null +++ b/numpy/random/src/threefry/threefry-orig.c @@ -0,0 +1,83 @@ +/* +Copyright (c) 2017, Pierre de Buyl + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +1. Redistributions of source code must retain the above copyright +notice, this list of conditions and the following disclaimer. + +2. Redistributions in binary form must reproduce the above copyright +notice, this list of conditions and the following disclaimer in the +documentation and/or other materials provided with the distribution. + +3. Neither the name of the copyright holder nor the names of its +contributors may be used to endorse or promote products derived from +this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#include "threefry.h" +#include <stdint.h> + +#define N_WORDS 2 +#define KEY_LENGTH 3 +#define C240 0x1BD11BDAA9FC1A22 +#define N_ROUNDS 20 +#define MASK 0xffffffffffffffff +#define DOUBLE_MULT 5.421010862427522e-20 + +static const int ROTATION[] = {16, 42, 12, 31, 16, 32, 24, 21}; + +uint64_t rotl_64(uint64_t x, int d) { return ((x << d) | (x >> (64 - d))); } + +threefry_t mix(threefry_t x, int R) { + x.c0 += x.c1; + x.c1 = rotl_64(x.c1, R) ^ x.c0; + return x; +} + +threefry_t threefry(threefry_t p, threefry_t k) { + uint64_t K[] = {k.c0, k.c1, C240 ^ k.c0 ^ k.c1}; + int rmod4, rdiv4; + threefry_t x; + x = p; + for (int r = 0; r < N_ROUNDS; r++) { + rmod4 = r % 4; + if (rmod4 == 0) { + rdiv4 = r / 4; + x.c0 += K[rdiv4 % KEY_LENGTH]; + x.c1 += K[(rdiv4 + 1) % KEY_LENGTH] + rdiv4; + } + x = mix(x, ROTATION[r % 8]); + } + x.c0 += K[(N_ROUNDS / 4) % KEY_LENGTH]; + x.c1 += K[(N_ROUNDS / 4 + 1) % KEY_LENGTH] + N_ROUNDS / 4; + return x; +} + +uint64_t threefry_uint64(threefry_t *c, threefry_t *k) { + threefry_t x; + x = threefry(*c, *k); + c->c0++; + return x.c0; +} + +double threefry_double(threefry_t *c, threefry_t *k) { + threefry_t x; + x = threefry(*c, *k); + c->c0++; + return x.c0 * DOUBLE_MULT; +} diff --git a/numpy/random/src/threefry/threefry-test-data-gen.c b/numpy/random/src/threefry/threefry-test-data-gen.c new file mode 100644 index 000000000..328eb2575 --- /dev/null +++ b/numpy/random/src/threefry/threefry-test-data-gen.c @@ -0,0 +1,83 @@ +/* + * Generate testing csv files + * + * cl threefry-test-data-gen.c /Ox ../splitmix64/splitmix64.c /Ox + * threefry-test-data-gen.exe + * + * gcc threefry-test-data-gen.c ../splitmix64/splitmix64.c /Ox -o + * threefry-test-data-gen + * ./threefry-test-data-gen + * + * Requres the Random123 directory containing header files to be located in the + * same directory (not included). + * + */ + +#include "../splitmix64/splitmix64.h" +#include "Random123/threefry.h" +#include <inttypes.h> +#include <stdio.h> + +#define N 1000 + +int main() { + threefry4x64_key_t ctr = {{0, 0, 0, 0}}; + uint64_t state, seed = 0xDEADBEAF; + state = seed; + threefry4x64_ctr_t key = {{0}}; + threefry4x64_ctr_t out; + uint64_t store[N]; + int i, j; + for (i = 0; i < 4; i++) { + key.v[i] = splitmix64_next(&state); + } + for (i = 0; i < N / 4UL; i++) { + ctr.v[0]++; + out = threefry4x64_R(threefry4x64_rounds, ctr, key); + for (j = 0; j < 4; j++) { + store[i * 4 + j] = out.v[j]; + } + } + + FILE *fp; + fp = fopen("threefry-testset-1.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); + + ctr.v[0] = 0; + state = seed = 0; + for (i = 0; i < 4; i++) { + key.v[i] = splitmix64_next(&state); + } + for (i = 0; i < N / 4; i++) { + ctr.v[0]++; + out = threefry4x64_R(threefry4x64_rounds, ctr, key); + for (j = 0; j < 4; j++) { + store[i * 4 + j] = out.v[j]; + } + } + + fp = fopen("threefry-testset-2.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); +} diff --git a/numpy/random/src/threefry/threefry.c b/numpy/random/src/threefry/threefry.c new file mode 100644 index 000000000..19c37df1b --- /dev/null +++ b/numpy/random/src/threefry/threefry.c @@ -0,0 +1,29 @@ +#include "threefry.h" + +extern INLINE uint64_t threefry_next64(threefry_state *state); + +extern INLINE uint32_t threefry_next32(threefry_state *state); + +extern void threefry_jump(threefry_state *state) { + /* Advances state as-if 2^128 draws were made */ + state->ctr->v[2]++; + if (state->ctr->v[2] == 0) { + state->ctr->v[3]++; + } +} + +extern void threefry_advance(uint64_t *step, threefry_state *state) { + int i, carry = 0; + uint64_t v_orig; + for (i = 0; i < 4; i++) { + if (carry == 1) { + state->ctr->v[i]++; + carry = state->ctr->v[i] == 0 ? 1 : 0; + } + v_orig = state->ctr->v[i]; + state->ctr->v[i] += step[i]; + if (state->ctr->v[i] < v_orig && carry == 0) { + carry = 1; + } + } +} diff --git a/numpy/random/src/threefry/threefry.h b/numpy/random/src/threefry/threefry.h new file mode 100644 index 000000000..297c1241a --- /dev/null +++ b/numpy/random/src/threefry/threefry.h @@ -0,0 +1,341 @@ +/* +Adapted from random123's threefry.h +*/ +#ifndef _RANDOMDGEN__THREEFRY_H_ +#define _RANDOMDGEN__THREEFRY_H_ + +#ifdef _WIN32 +#if _MSC_VER == 1500 +#include "../common/inttypes.h" +#define INLINE __forceinline +#else +#include <inttypes.h> +#define INLINE __inline __forceinline +#endif +#else +#include <inttypes.h> +#define INLINE inline +#endif + +#define THREEFRY_BUFFER_SIZE 4L + +enum r123_enum_threefry64x4 { + /* These are the R_256 constants from the Threefish reference sources + with names changed to R_64x4... */ + R_64x4_0_0 = 14, + R_64x4_0_1 = 16, + R_64x4_1_0 = 52, + R_64x4_1_1 = 57, + R_64x4_2_0 = 23, + R_64x4_2_1 = 40, + R_64x4_3_0 = 5, + R_64x4_3_1 = 37, + R_64x4_4_0 = 25, + R_64x4_4_1 = 33, + R_64x4_5_0 = 46, + R_64x4_5_1 = 12, + R_64x4_6_0 = 58, + R_64x4_6_1 = 22, + R_64x4_7_0 = 32, + R_64x4_7_1 = 32 +}; + +struct r123array4x64 { + uint64_t v[4]; +}; /* r123array4x64 */ + +typedef struct r123array4x64 threefry4x64_key_t; +typedef struct r123array4x64 threefry4x64_ctr_t; + +static INLINE uint64_t RotL_64(uint64_t x, unsigned int N); +static INLINE uint64_t RotL_64(uint64_t x, unsigned int N) { + return (x << (N & 63)) | (x >> ((64 - N) & 63)); +} + +static INLINE threefry4x64_ctr_t threefry4x64_R(unsigned int Nrounds, + threefry4x64_ctr_t in, + threefry4x64_key_t k); +static INLINE threefry4x64_ctr_t threefry4x64_R(unsigned int Nrounds, + threefry4x64_ctr_t in, + threefry4x64_key_t k) { + threefry4x64_ctr_t X; + uint64_t ks[4 + 1]; + int i; + ks[4] = ((0xA9FC1A22) + (((uint64_t)(0x1BD11BDA)) << 32)); + for (i = 0; i < 4; i++) { + ks[i] = k.v[i]; + X.v[i] = in.v[i]; + ks[4] ^= k.v[i]; + } + X.v[0] += ks[0]; + X.v[1] += ks[1]; + X.v[2] += ks[2]; + X.v[3] += ks[3]; + if (Nrounds > 0) { + X.v[0] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_0_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_0_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 1) { + X.v[0] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_1_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_1_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 2) { + X.v[0] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_2_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_2_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 3) { + X.v[0] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_3_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_3_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 3) { + X.v[0] += ks[1]; + X.v[1] += ks[2]; + X.v[2] += ks[3]; + X.v[3] += ks[4]; + X.v[4 - 1] += 1; + } + if (Nrounds > 4) { + X.v[0] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_4_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_4_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 5) { + X.v[0] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_5_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_5_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 6) { + X.v[0] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_6_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_6_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 7) { + X.v[0] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_7_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_7_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 7) { + X.v[0] += ks[2]; + X.v[1] += ks[3]; + X.v[2] += ks[4]; + X.v[3] += ks[0]; + X.v[4 - 1] += 2; + } + if (Nrounds > 8) { + X.v[0] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_0_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_0_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 9) { + X.v[0] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_1_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_1_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 10) { + X.v[0] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_2_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_2_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 11) { + X.v[0] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_3_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_3_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 11) { + X.v[0] += ks[3]; + X.v[1] += ks[4]; + X.v[2] += ks[0]; + X.v[3] += ks[1]; + X.v[4 - 1] += 3; + } + if (Nrounds > 12) { + X.v[0] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_4_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_4_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 13) { + X.v[0] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_5_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_5_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 14) { + X.v[0] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_6_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_6_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 15) { + X.v[0] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_7_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_7_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 15) { + X.v[0] += ks[4]; + X.v[1] += ks[0]; + X.v[2] += ks[1]; + X.v[3] += ks[2]; + X.v[4 - 1] += 4; + } + if (Nrounds > 16) { + X.v[0] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_0_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_0_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 17) { + X.v[0] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_1_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_1_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 18) { + X.v[0] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_2_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_2_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 19) { + X.v[0] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_3_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_3_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 19) { + X.v[0] += ks[0]; + X.v[1] += ks[1]; + X.v[2] += ks[2]; + X.v[3] += ks[3]; + X.v[4 - 1] += 5; + } + /* Maximum of 20 rounds */ + if (Nrounds > 20) { + X.v[0] += X.v[1]; + X.v[1] = RotL_64(X.v[1], R_64x4_4_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_64(X.v[3], R_64x4_4_1); + X.v[3] ^= X.v[2]; + } + return X; +} +enum r123_enum_threefry4x64 { threefry4x64_rounds = 20 }; + +typedef struct s_threefry_state { + threefry4x64_key_t *ctr; + threefry4x64_ctr_t *key; + int buffer_pos; + uint64_t buffer[THREEFRY_BUFFER_SIZE]; + int has_uint32; + uint32_t uinteger; +} threefry_state; + +static INLINE uint64_t threefry_next(threefry_state *state) { + int i; + threefry4x64_ctr_t ct; + uint64_t out; + if (state->buffer_pos < THREEFRY_BUFFER_SIZE) { + out = state->buffer[state->buffer_pos]; + state->buffer_pos++; + return out; + } + /* generate 4 new uint64_t */ + state->ctr->v[0]++; + /* Handle carry */ + if (state->ctr->v[0] == 0) { + state->ctr->v[1]++; + if (state->ctr->v[1] == 0) { + state->ctr->v[2]++; + if (state->ctr->v[2] == 0) { + state->ctr->v[3]++; + } + } + } + ct = threefry4x64_R(threefry4x64_rounds, *state->ctr, *state->key); + for (i = 0; i < 4; i++) { + state->buffer[i] = ct.v[i]; + } + state->buffer_pos = 1; + return state->buffer[0]; +} + +static INLINE uint64_t threefry_next64(threefry_state *state) { + return threefry_next(state); +} + +static INLINE uint32_t threefry_next32(threefry_state *state) { + uint64_t next; + if (state->has_uint32) { + state->has_uint32 = 0; + return state->uinteger; + } + next = threefry_next(state); + + state->has_uint32 = 1; + state->uinteger = (uint32_t)(next >> 32); + return (uint32_t)(next & 0xffffffff); +} + +extern void threefry_jump(threefry_state *state); + +extern void threefry_advance(uint64_t *step, threefry_state *state); + +#endif diff --git a/numpy/random/src/threefry32/LICENSE.md b/numpy/random/src/threefry32/LICENSE.md new file mode 100644 index 000000000..591cd75f4 --- /dev/null +++ b/numpy/random/src/threefry32/LICENSE.md @@ -0,0 +1,31 @@ +# THREEFRY32 + +Copyright 2010-2012, D. E. Shaw Research. +All rights reserved. + +Redistribution and use in source and binary forms, with or without +modification, are permitted provided that the following conditions are +met: + +* Redistributions of source code must retain the above copyright + notice, this list of conditions, and the following disclaimer. + +* Redistributions in binary form must reproduce the above copyright + notice, this list of conditions, and the following disclaimer in the + documentation and/or other materials provided with the distribution. + +* Neither the name of D. E. Shaw Research nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + +THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. diff --git a/numpy/random/src/threefry32/threefry32-test-data-gen.c b/numpy/random/src/threefry32/threefry32-test-data-gen.c new file mode 100644 index 000000000..0e6229995 --- /dev/null +++ b/numpy/random/src/threefry32/threefry32-test-data-gen.c @@ -0,0 +1,88 @@ +/* + * Generate testing csv files + * + * cl threefry32-test-data-gen.c /Ox ../splitmix64/splitmix64.c /Ox + * threefry32-test-data-gen.exe + * + * gcc threefry32-test-data-gen.c ../splitmix64/splitmix64.c /Ox -o + * threefry32-test-data-gen + * ./threefry32-test-data-gen + * + * Requires the Random123 directory containing header files to be located in the + * same directory (not included). + * + */ + +#include "../splitmix64/splitmix64.h" +#include "Random123/threefry.h" +#include <inttypes.h> +#include <stdio.h> + +#define N 1000 + +int main() { + threefry4x32_key_t ctr = {{0, 0, 0, 0}}; + uint64_t state, seed = 0xDEADBEAF; + state = seed; + threefry4x32_ctr_t key = {{0}}; + threefry4x32_ctr_t out; + uint64_t store[N]; + uint64_t seed_val; + int i, j; + for (i = 0; i < 4; i++) { + seed_val = splitmix64_next(&state); + key.v[2*i] = (uint32_t)seed_val; + key.v[2*i+1] = (uint32_t)(seed_val >> 32); + } + for (i = 0; i < N / 4UL; i++) { + ctr.v[0]++; + out = threefry4x32_R(threefry4x32_rounds, ctr, key); + for (j = 0; j < 4; j++) { + store[i * 4 + j] = out.v[j]; + } + } + + FILE *fp; + fp = fopen("threefry32-testset-1.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); + + ctr.v[0] = 0; + state = seed = 0; + for (i = 0; i < 4; i++) { + seed_val = splitmix64_next(&state); + key.v[2*i] = (uint32_t)seed_val; + key.v[2*i+1] = (uint32_t)(seed_val >> 32); + } + for (i = 0; i < N / 4; i++) { + ctr.v[0]++; + out = threefry4x32_R(threefry4x32_rounds, ctr, key); + for (j = 0; j < 4; j++) { + store[i * 4 + j] = out.v[j]; + } + } + + fp = fopen("threefry32-testset-2.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); +} diff --git a/numpy/random/src/threefry32/threefry32.c b/numpy/random/src/threefry32/threefry32.c new file mode 100644 index 000000000..500e9482d --- /dev/null +++ b/numpy/random/src/threefry32/threefry32.c @@ -0,0 +1,29 @@ +#include "threefry32.h" + +extern INLINE uint64_t threefry32_next64(threefry32_state *state); + +extern INLINE uint32_t threefry32_next32(threefry32_state *state); + +extern void threefry32_jump(threefry32_state *state) { + /* Advances state as-if 2^64 draws were made */ + state->ctr->v[2]++; + if (state->ctr->v[2] == 0) { + state->ctr->v[3]++; + } +} + +extern void threefry32_advance(uint32_t *step, threefry32_state *state) { + int i, carry = 0; + uint32_t v_orig; + for (i = 0; i < 4; i++) { + if (carry == 1) { + state->ctr->v[i]++; + carry = state->ctr->v[i] == 0 ? 1 : 0; + } + v_orig = state->ctr->v[i]; + state->ctr->v[i] += step[i]; + if (state->ctr->v[i] < v_orig && carry == 0) { + carry = 1; + } + } +} diff --git a/numpy/random/src/threefry32/threefry32.h b/numpy/random/src/threefry32/threefry32.h new file mode 100644 index 000000000..74a85c42b --- /dev/null +++ b/numpy/random/src/threefry32/threefry32.h @@ -0,0 +1,842 @@ +/* +Adapted from random123's threefry.h +*/ +#ifndef _RANDOMDGEN__THREEFRY32_H_ +#define _RANDOMDGEN__THREEFRY32_H_ + +#ifdef _WIN32 +#if _MSC_VER == 1500 +#include "../common/inttypes.h" +#define INLINE __forceinline +#else +#include <inttypes.h> +#define INLINE __inline __forceinline +#endif +#else +#include <inttypes.h> +#define INLINE inline +#endif + +#define THREEFRY_BUFFER_SIZE 4L + +static INLINE uint32_t RotL_32(uint32_t x, unsigned int N); +static INLINE uint32_t RotL_32(uint32_t x, unsigned int N) { + return (x << (N & 31)) | (x >> ((32 - N) & 31)); +} + +struct r123array4x32 { + uint32_t v[4]; +}; + +enum r123_enum_threefry32x4 { + + R_32x4_0_0 = 10, + R_32x4_0_1 = 26, + R_32x4_1_0 = 11, + R_32x4_1_1 = 21, + R_32x4_2_0 = 13, + R_32x4_2_1 = 27, + R_32x4_3_0 = 23, + R_32x4_3_1 = 5, + R_32x4_4_0 = 6, + R_32x4_4_1 = 20, + R_32x4_5_0 = 17, + R_32x4_5_1 = 11, + R_32x4_6_0 = 25, + R_32x4_6_1 = 10, + R_32x4_7_0 = 18, + R_32x4_7_1 = 20 + +}; + +typedef struct r123array4x32 threefry4x32_ctr_t; +typedef struct r123array4x32 threefry4x32_key_t; +typedef struct r123array4x32 threefry4x32_ukey_t; +static INLINE threefry4x32_key_t threefry4x32keyinit(threefry4x32_ukey_t uk) { + return uk; +}; +static INLINE threefry4x32_ctr_t threefry4x32_R(unsigned int Nrounds, + threefry4x32_ctr_t in, + threefry4x32_key_t k); +static INLINE threefry4x32_ctr_t threefry4x32_R(unsigned int Nrounds, + threefry4x32_ctr_t in, + threefry4x32_key_t k) { + threefry4x32_ctr_t X; + uint32_t ks[4 + 1]; + int i; + ks[4] = 0x1BD11BDA; + for (i = 0; i < 4; i++) { + ks[i] = k.v[i]; + X.v[i] = in.v[i]; + ks[4] ^= k.v[i]; + } + X.v[0] += ks[0]; + X.v[1] += ks[1]; + X.v[2] += ks[2]; + X.v[3] += ks[3]; + if (Nrounds > 0) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_0_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_0_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 1) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_1_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_1_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 2) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_2_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_2_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 3) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_3_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_3_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 3) { + X.v[0] += ks[1]; + X.v[1] += ks[2]; + X.v[2] += ks[3]; + X.v[3] += ks[4]; + X.v[4 - 1] += 1; + } + if (Nrounds > 4) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_4_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_4_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 5) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_5_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_5_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 6) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_6_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_6_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 7) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_7_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_7_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 7) { + X.v[0] += ks[2]; + X.v[1] += ks[3]; + X.v[2] += ks[4]; + X.v[3] += ks[0]; + X.v[4 - 1] += 2; + } + if (Nrounds > 8) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_0_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_0_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 9) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_1_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_1_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 10) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_2_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_2_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 11) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_3_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_3_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 11) { + X.v[0] += ks[3]; + X.v[1] += ks[4]; + X.v[2] += ks[0]; + X.v[3] += ks[1]; + X.v[4 - 1] += 3; + } + if (Nrounds > 12) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_4_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_4_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 13) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_5_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_5_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 14) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_6_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_6_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 15) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_7_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_7_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 15) { + X.v[0] += ks[4]; + X.v[1] += ks[0]; + X.v[2] += ks[1]; + X.v[3] += ks[2]; + X.v[4 - 1] += 4; + } + if (Nrounds > 16) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_0_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_0_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 17) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_1_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_1_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 18) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_2_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_2_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 19) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_3_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_3_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 19) { + X.v[0] += ks[0]; + X.v[1] += ks[1]; + X.v[2] += ks[2]; + X.v[3] += ks[3]; + X.v[4 - 1] += 5; + } + if (Nrounds > 20) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_4_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_4_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 21) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_5_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_5_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 22) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_6_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_6_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 23) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_7_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_7_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 23) { + X.v[0] += ks[1]; + X.v[1] += ks[2]; + X.v[2] += ks[3]; + X.v[3] += ks[4]; + X.v[4 - 1] += 6; + } + if (Nrounds > 24) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_0_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_0_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 25) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_1_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_1_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 26) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_2_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_2_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 27) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_3_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_3_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 27) { + X.v[0] += ks[2]; + X.v[1] += ks[3]; + X.v[2] += ks[4]; + X.v[3] += ks[0]; + X.v[4 - 1] += 7; + } + if (Nrounds > 28) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_4_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_4_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 29) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_5_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_5_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 30) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_6_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_6_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 31) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_7_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_7_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 31) { + X.v[0] += ks[3]; + X.v[1] += ks[4]; + X.v[2] += ks[0]; + X.v[3] += ks[1]; + X.v[4 - 1] += 8; + } + if (Nrounds > 32) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_0_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_0_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 33) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_1_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_1_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 34) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_2_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_2_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 35) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_3_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_3_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 35) { + X.v[0] += ks[4]; + X.v[1] += ks[0]; + X.v[2] += ks[1]; + X.v[3] += ks[2]; + X.v[4 - 1] += 9; + } + if (Nrounds > 36) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_4_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_4_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 37) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_5_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_5_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 38) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_6_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_6_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 39) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_7_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_7_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 39) { + X.v[0] += ks[0]; + X.v[1] += ks[1]; + X.v[2] += ks[2]; + X.v[3] += ks[3]; + X.v[4 - 1] += 10; + } + if (Nrounds > 40) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_0_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_0_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 41) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_1_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_1_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 42) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_2_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_2_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 43) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_3_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_3_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 43) { + X.v[0] += ks[1]; + X.v[1] += ks[2]; + X.v[2] += ks[3]; + X.v[3] += ks[4]; + X.v[4 - 1] += 11; + } + if (Nrounds > 44) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_4_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_4_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 45) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_5_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_5_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 46) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_6_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_6_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 47) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_7_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_7_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 47) { + X.v[0] += ks[2]; + X.v[1] += ks[3]; + X.v[2] += ks[4]; + X.v[3] += ks[0]; + X.v[4 - 1] += 12; + } + if (Nrounds > 48) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_0_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_0_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 49) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_1_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_1_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 50) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_2_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_2_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 51) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_3_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_3_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 51) { + X.v[0] += ks[3]; + X.v[1] += ks[4]; + X.v[2] += ks[0]; + X.v[3] += ks[1]; + X.v[4 - 1] += 13; + } + if (Nrounds > 52) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_4_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_4_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 53) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_5_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_5_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 54) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_6_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_6_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 55) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_7_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_7_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 55) { + X.v[0] += ks[4]; + X.v[1] += ks[0]; + X.v[2] += ks[1]; + X.v[3] += ks[2]; + X.v[4 - 1] += 14; + } + if (Nrounds > 56) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_0_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_0_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 57) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_1_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_1_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 58) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_2_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_2_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 59) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_3_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_3_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 59) { + X.v[0] += ks[0]; + X.v[1] += ks[1]; + X.v[2] += ks[2]; + X.v[3] += ks[3]; + X.v[4 - 1] += 15; + } + if (Nrounds > 60) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_4_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_4_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 61) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_5_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_5_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 62) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_6_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_6_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 63) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_7_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_7_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 63) { + X.v[0] += ks[1]; + X.v[1] += ks[2]; + X.v[2] += ks[3]; + X.v[3] += ks[4]; + X.v[4 - 1] += 16; + } + if (Nrounds > 64) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_0_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_0_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 65) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_1_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_1_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 66) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_2_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_2_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 67) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_3_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_3_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 67) { + X.v[0] += ks[2]; + X.v[1] += ks[3]; + X.v[2] += ks[4]; + X.v[3] += ks[0]; + X.v[4 - 1] += 17; + } + if (Nrounds > 68) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_4_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_4_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 69) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_5_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_5_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 70) { + X.v[0] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_6_0); + X.v[1] ^= X.v[0]; + X.v[2] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_6_1); + X.v[3] ^= X.v[2]; + } + if (Nrounds > 71) { + X.v[0] += X.v[3]; + X.v[3] = RotL_32(X.v[3], R_32x4_7_0); + X.v[3] ^= X.v[0]; + X.v[2] += X.v[1]; + X.v[1] = RotL_32(X.v[1], R_32x4_7_1); + X.v[1] ^= X.v[2]; + } + if (Nrounds > 71) { + X.v[0] += ks[3]; + X.v[1] += ks[4]; + X.v[2] += ks[0]; + X.v[3] += ks[1]; + X.v[4 - 1] += 18; + } + return X; +} +enum r123_enum_threefry4x32 { threefry4x32_rounds = 20 }; +static INLINE threefry4x32_ctr_t threefry4x32(threefry4x32_ctr_t in, + threefry4x32_key_t k); +static INLINE threefry4x32_ctr_t threefry4x32(threefry4x32_ctr_t in, + threefry4x32_key_t k) { + return threefry4x32_R(threefry4x32_rounds, in, k); +} + +typedef struct s_threefry32_state { + threefry4x32_key_t *ctr; + threefry4x32_ctr_t *key; + int buffer_pos; + uint32_t buffer[THREEFRY_BUFFER_SIZE]; +} threefry32_state; + +static INLINE uint32_t threefry32_next(threefry32_state *state) { + int i; + threefry4x32_ctr_t ct; + uint32_t out; + if (state->buffer_pos < THREEFRY_BUFFER_SIZE) { + out = state->buffer[state->buffer_pos]; + state->buffer_pos++; + return out; + } + /* generate 4 new uint64_t */ + state->ctr->v[0]++; + /* Handle carry */ + if (state->ctr->v[0] == 0) { + state->ctr->v[1]++; + if (state->ctr->v[1] == 0) { + state->ctr->v[2]++; + if (state->ctr->v[2] == 0) { + state->ctr->v[3]++; + } + } + } + ct = threefry4x32_R(threefry4x32_rounds, *state->ctr, *state->key); + for (i = 0; i < 4; i++) { + state->buffer[i] = ct.v[i]; + } + state->buffer_pos = 1; + return state->buffer[0]; +} + +static INLINE uint64_t threefry32_next64(threefry32_state *state) { + return ((uint64_t)threefry32_next(state) << 32) | threefry32_next(state); +} + +static INLINE uint32_t threefry32_next32(threefry32_state *state) { + return threefry32_next(state); +} + +static INLINE double threefry32_next_double(threefry32_state *state) { + int32_t a = threefry32_next(state) >> 5, b = threefry32_next(state) >> 6; + return (a * 67108864.0 + b) / 9007199254740992.0; +} + +extern void threefry32_jump(threefry32_state *state); + +extern void threefry32_advance(uint32_t *step, threefry32_state *state); + +#endif diff --git a/numpy/random/src/xoroshiro128/LICENSE.md b/numpy/random/src/xoroshiro128/LICENSE.md new file mode 100644 index 000000000..969430149 --- /dev/null +++ b/numpy/random/src/xoroshiro128/LICENSE.md @@ -0,0 +1,9 @@ +# XOROSHIRO128 + +Written in 2016 by David Blackman and Sebastiano Vigna (vigna@acm.org) + +To the extent possible under law, the author has dedicated all copyright +and related and neighboring rights to this software to the public domain +worldwide. This software is distributed without any warranty. + +See <http://creativecommons.org/publicdomain/zero/1.0/>.
\ No newline at end of file diff --git a/numpy/random/src/xoroshiro128/xoroshiro128-benchmark.c b/numpy/random/src/xoroshiro128/xoroshiro128-benchmark.c new file mode 100644 index 000000000..9a7b52bfb --- /dev/null +++ b/numpy/random/src/xoroshiro128/xoroshiro128-benchmark.c @@ -0,0 +1,35 @@ +/* + * cl xoroshiro128-benchmark.c xoroshiro128plus.orig.c \ + * ../splitmix64/splitmix64.c /Ox + * + * gcc -O3 xoroshiro128-benchmark.c xoroshiro128plus.orig.c \ + * ../splitmix64/splitmix64.c -o xoroshiro128-benchmark + * + */ +#include "../splitmix64/splitmix64.h" +#include "xoroshiro128plus.orig.h" +#include <inttypes.h> +#include <stdio.h> +#include <time.h> + +#define N 1000000000 + +int main() +{ + uint64_t count = 0, sum = 0; + uint64_t seed = 0xDEADBEAF; + s[0] = splitmix64_next(&seed); + s[1] = splitmix64_next(&seed); + int i; + clock_t begin = clock(); + for (i = 0; i < N; i++) + { + sum += next(); + count++; + } + clock_t end = clock(); + double time_spent = (double)(end - begin) / CLOCKS_PER_SEC; + printf("0x%" PRIx64 "\ncount: %" PRIu64 "\n", sum, count); + printf("%" PRIu64 " randoms per second\n", + (uint64_t)(N / time_spent) / 1000000 * 1000000); +} diff --git a/numpy/random/src/xoroshiro128/xoroshiro128-test-data-gen.c b/numpy/random/src/xoroshiro128/xoroshiro128-test-data-gen.c new file mode 100644 index 000000000..d50e63f5e --- /dev/null +++ b/numpy/random/src/xoroshiro128/xoroshiro128-test-data-gen.c @@ -0,0 +1,83 @@ +/* + * Generate testing csv files + * + * cl xoroshiro128-test-data-gen.c xoroshiro128plus.orig.c / + * ../splitmix64/splitmix64.c /Ox + * xoroshiro128-test-data-gen.exe * + * + * gcc xoroshiro128-test-data-gen.c xoroshiro128plus.orig.c / + * ../splitmix64/splitmix64.c -o xoroshiro128-test-data-gen + * ./xoroshiro128-test-data-gen + * + * Requres the Random123 directory containing header files to be located in the + * same directory (not included). + * + */ + +#include "../splitmix64/splitmix64.h" +#include "xoroshiro128plus.orig.h" +#include <inttypes.h> +#include <stdio.h> + +#define N 1000 + +int main() +{ + uint64_t sum = 0; + uint64_t state, seed = 0xDEADBEAF; + state = seed; + int i; + for (i = 0; i < 2; i++) + { + s[i] = splitmix64_next(&state); + } + uint64_t store[N]; + for (i = 0; i < N; i++) + { + store[i] = next(); + } + + FILE *fp; + fp = fopen("xoroshiro128-testset-1.csv", "w"); + if (fp == NULL) + { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) + { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) + { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); + + seed = state = 0; + for (i = 0; i < 2; i++) + { + s[i] = splitmix64_next(&state); + } + for (i = 0; i < N; i++) + { + store[i] = next(); + } + fp = fopen("xoroshiro128-testset-2.csv", "w"); + if (fp == NULL) + { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) + { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) + { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); +} diff --git a/numpy/random/src/xoroshiro128/xoroshiro128.c b/numpy/random/src/xoroshiro128/xoroshiro128.c new file mode 100644 index 000000000..060eb8a51 --- /dev/null +++ b/numpy/random/src/xoroshiro128/xoroshiro128.c @@ -0,0 +1,60 @@ +/* Written in 2016-2018 by David Blackman and Sebastiano Vigna (vigna@acm.org) + +To the extent possible under law, the author has dedicated all copyright +and related and neighboring rights to this software to the public domain +worldwide. This software is distributed without any warranty. + +See <http://creativecommons.org/publicdomain/zero/1.0/>. */ + +/* This is xoroshiro128+ 1.0, our best and fastest small-state generator + for floating-point numbers. We suggest to use its upper bits for + floating-point generation, as it is slightly faster than + xoroshiro128**. It passes all tests we are aware of except for the four + lower bits, which might fail linearity tests (and just those), so if + low linear complexity is not considered an issue (as it is usually the + case) it can be used to generate 64-bit outputs, too; moreover, this + generator has a very mild Hamming-weight dependency making our test + (http://prng.di.unimi.it/hwd.php) fail after 5 TB of output; we believe + this slight bias cannot affect any application. If you are concerned, + use xoroshiro128** or xoshiro256+. + + We suggest to use a sign test to extract a random Boolean value, and + right shifts to extract subsets of bits. + + The state must be seeded so that it is not everywhere zero. If you have + a 64-bit seed, we suggest to seed a splitmix64 generator and use its + output to fill s. + + NOTE: the parameters (a=24, b=16, b=37) of this version give slightly + better results in our test than the 2016 version (a=55, b=14, c=36). +*/ + +#include "xoroshiro128.h" + +extern INLINE uint64_t xoroshiro128_next64(xoroshiro128_state *state); + +extern INLINE uint32_t xoroshiro128_next32(xoroshiro128_state *state); + +void xoroshiro128_jump(xoroshiro128_state *state) +{ + int i, b; + uint64_t s0; + uint64_t s1; + static const uint64_t JUMP[] = {0xdf900294d8f554a5, 0x170865df4b3201fc}; + + s0 = 0; + s1 = 0; + for (i = 0; i < sizeof JUMP / sizeof *JUMP; i++) + for (b = 0; b < 64; b++) + { + if (JUMP[i] & UINT64_C(1) << b) + { + s0 ^= state->s[0]; + s1 ^= state->s[1]; + } + xoroshiro128_next(&state->s[0]); + } + + state->s[0] = s0; + state->s[1] = s1; +} diff --git a/numpy/random/src/xoroshiro128/xoroshiro128.h b/numpy/random/src/xoroshiro128/xoroshiro128.h new file mode 100644 index 000000000..0db82b173 --- /dev/null +++ b/numpy/random/src/xoroshiro128/xoroshiro128.h @@ -0,0 +1,63 @@ +#ifndef _RANDOMDGEN__XOROSHIRO128_H_ +#define _RANDOMDGEN__XOROSHIRO128_H_ + +#ifdef _WIN32 +#if _MSC_VER == 1500 +#include "../common/inttypes.h" +#define INLINE __forceinline +#else +#include <inttypes.h> +#define INLINE __inline __forceinline +#endif +#else +#include <inttypes.h> +#define INLINE inline +#endif + +typedef struct s_xoroshiro128_state +{ + uint64_t s[2]; + int has_uint32; + uint32_t uinteger; +} xoroshiro128_state; + +static INLINE uint64_t rotl(const uint64_t x, int k) +{ + return (x << k) | (x >> (64 - k)); +} + +static INLINE uint64_t xoroshiro128_next(uint64_t *s) +{ + const uint64_t s0 = s[0]; + uint64_t s1 = s[1]; + const uint64_t result = s0 + s1; + + s1 ^= s0; + s[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16); // a, b + s[1] = rotl(s1, 37); // c + + return result; +} + +static INLINE uint64_t xoroshiro128_next64(xoroshiro128_state *state) +{ + return xoroshiro128_next(&state->s[0]); +} + +static INLINE uint32_t xoroshiro128_next32(xoroshiro128_state *state) +{ + uint64_t next; + if (state->has_uint32) + { + state->has_uint32 = 0; + return state->uinteger; + } + next = xoroshiro128_next(&state->s[0]); + state->has_uint32 = 1; + state->uinteger = (uint32_t)(next >> 32); + return (uint32_t)(next & 0xffffffff); +} + +void xoroshiro128_jump(xoroshiro128_state *state); + +#endif diff --git a/numpy/random/src/xoroshiro128/xoroshiro128plus.orig.c b/numpy/random/src/xoroshiro128/xoroshiro128plus.orig.c new file mode 100644 index 000000000..1b5f46e4b --- /dev/null +++ b/numpy/random/src/xoroshiro128/xoroshiro128plus.orig.c @@ -0,0 +1,102 @@ +/* Written in 2016-2018 by David Blackman and Sebastiano Vigna (vigna@acm.org) + +To the extent possible under law, the author has dedicated all copyright +and related and neighboring rights to this software to the public domain +worldwide. This software is distributed without any warranty. + +See <http://creativecommons.org/publicdomain/zero/1.0/>. */ + +#include <stdint.h> + +/* This is xoroshiro128+ 1.0, our best and fastest small-state generator + for floating-point numbers. We suggest to use its upper bits for + floating-point generation, as it is slightly faster than + xoroshiro128**. It passes all tests we are aware of except for the four + lower bits, which might fail linearity tests (and just those), so if + low linear complexity is not considered an issue (as it is usually the + case) it can be used to generate 64-bit outputs, too; moreover, this + generator has a very mild Hamming-weight dependency making our test + (http://prng.di.unimi.it/hwd.php) fail after 5 TB of output; we believe + this slight bias cannot affect any application. If you are concerned, + use xoroshiro128** or xoshiro256+. + + We suggest to use a sign test to extract a random Boolean value, and + right shifts to extract subsets of bits. + + The state must be seeded so that it is not everywhere zero. If you have + a 64-bit seed, we suggest to seed a splitmix64 generator and use its + output to fill s. + + NOTE: the parameters (a=24, b=16, b=37) of this version give slightly + better results in our test than the 2016 version (a=55, b=14, c=36). +*/ + +uint64_t s[2]; + +static inline uint64_t rotl(const uint64_t x, int k) +{ + return (x << k) | (x >> (64 - k)); +} + +uint64_t next(void) +{ + const uint64_t s0 = s[0]; + uint64_t s1 = s[1]; + const uint64_t result = s0 + s1; + + s1 ^= s0; + s[0] = rotl(s0, 24) ^ s1 ^ (s1 << 16); // a, b + s[1] = rotl(s1, 37); // c + + return result; +} + +/* This is the jump function for the generator. It is equivalent + to 2^64 calls to next(); it can be used to generate 2^64 + non-overlapping subsequences for parallel computations. */ + +void jump(void) +{ + static const uint64_t JUMP[] = {0xdf900294d8f554a5, 0x170865df4b3201fc}; + + uint64_t s0 = 0; + uint64_t s1 = 0; + for (int i = 0; i < sizeof JUMP / sizeof *JUMP; i++) + for (int b = 0; b < 64; b++) + { + if (JUMP[i] & UINT64_C(1) << b) + { + s0 ^= s[0]; + s1 ^= s[1]; + } + next(); + } + s[0] = s0; + s[1] = s1; +} + +/* This is the long-jump function for the generator. It is equivalent to + 2^96 calls to next(); it can be used to generate 2^32 starting points, + from each of which jump() will generate 2^32 non-overlapping + subsequences for parallel distributed computations. */ + +void long_jump(void) +{ + static const uint64_t LONG_JUMP[] = {0xd2a98b26625eee7b, 0xdddf9b1090aa7ac1}; + + uint64_t s0 = 0; + uint64_t s1 = 0; + for (int i = 0; i < sizeof LONG_JUMP / sizeof *LONG_JUMP; i++) + for (int b = 0; b < 64; b++) + { + if (LONG_JUMP[i] & UINT64_C(1) << b) + { + s0 ^= s[0]; + s1 ^= s[1]; + } + next(); + } + + s[0] = s0; + s[1] = s1; +} diff --git a/numpy/random/src/xoroshiro128/xoroshiro128plus.orig.h b/numpy/random/src/xoroshiro128/xoroshiro128plus.orig.h new file mode 100644 index 000000000..20c96fe04 --- /dev/null +++ b/numpy/random/src/xoroshiro128/xoroshiro128plus.orig.h @@ -0,0 +1,5 @@ +#include <stdint.h> + +uint64_t s[2]; +uint64_t next(void); +void jump(void); diff --git a/numpy/random/src/xorshift1024/LICENSE.md b/numpy/random/src/xorshift1024/LICENSE.md new file mode 100644 index 000000000..3ca8ed4b9 --- /dev/null +++ b/numpy/random/src/xorshift1024/LICENSE.md @@ -0,0 +1,9 @@ +# XORSHIFT1024 + +Written in 2017 by Sebastiano Vigna (vigna@acm.org) + +To the extent possible under law, the author has dedicated all copyright +and related and neighboring rights to this software to the public domain +worldwide. This software is distributed without any warranty. + +See <http://creativecommons.org/publicdomain/zero/1.0/>.
\ No newline at end of file diff --git a/numpy/random/src/xorshift1024/xorshift1024-benchmark.c b/numpy/random/src/xorshift1024/xorshift1024-benchmark.c new file mode 100644 index 000000000..0eef33537 --- /dev/null +++ b/numpy/random/src/xorshift1024/xorshift1024-benchmark.c @@ -0,0 +1,35 @@ +/* + * cl xorshift1024-benchmark.c xorshift2014.orig.c + * ../splitmix64/splitmix64.c /Ox + * + * gcc -O3 xorshift1024-benchmark.c xorshift2014.orig.c / + * ../splitmix64/splitmix64.c -o xorshift1024-benchmark + * + */ +#include "../splitmix64/splitmix64.h" +#include "xorshift1024.orig.h" +#include <inttypes.h> +#include <stdio.h> +#include <time.h> + +#define N 1000000000 + +int main() { + uint64_t count = 0, sum = 0; + uint64_t seed = 0xDEADBEAF; + int i; + for (i = 0; i < 16; i++) { + s[i] = splitmix64_next(&seed); + } + p = 0; + clock_t begin = clock(); + for (i = 0; i < N; i++) { + sum += next(); + count++; + } + clock_t end = clock(); + double time_spent = (double)(end - begin) / CLOCKS_PER_SEC; + printf("0x%" PRIx64 "\ncount: %" PRIu64 "\n", sum, count); + printf("%" PRIu64 " randoms per second\n", + (uint64_t)(N / time_spent) / 1000000 * 1000000); +} diff --git a/numpy/random/src/xorshift1024/xorshift1024-test-data-gen.c b/numpy/random/src/xorshift1024/xorshift1024-test-data-gen.c new file mode 100644 index 000000000..a2ae08df4 --- /dev/null +++ b/numpy/random/src/xorshift1024/xorshift1024-test-data-gen.c @@ -0,0 +1,74 @@ +/* + * Generate testing csv files + * + * cl xorshift1024-test-data-gen.c xorshift1024.orig.c / + * ../splitmix64/splitmix64.c /Ox + * xorshift1024-test-data-gen.exe * + * + * gcc xorshift1024-test-data-gen.c xorshift1024.orig.c / + * ../splitmix64/splitmix64.c -o xorshift1024-test-data-gen + * ./xorshift1024-test-data-gen + * + * Requres the Random123 directory containing header files to be located in the + * same directory (not included). + * + */ + +#include "../splitmix64/splitmix64.h" +#include "xorshift1024.orig.h" +#include <inttypes.h> +#include <stdio.h> + +#define N 1000 + +int main() { + uint64_t sum = 0; + uint64_t state, seed = 0xDEADBEAF; + state = seed; + int i; + for (i = 0; i < 16; i++) { + s[i] = splitmix64_next(&state); + } + p = 0; + uint64_t store[N]; + for (i = 0; i < N; i++) { + store[i] = next(); + } + + FILE *fp; + fp = fopen("xorshift1024-testset-1.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); + + seed = state = 0; + for (i = 0; i < 16; i++) { + s[i] = splitmix64_next(&state); + } + p = 0; + for (i = 0; i < N; i++) { + store[i] = next(); + } + fp = fopen("xorshift1024-testset-2.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); +} diff --git a/numpy/random/src/xorshift1024/xorshift1024.c b/numpy/random/src/xorshift1024/xorshift1024.c new file mode 100644 index 000000000..8737b5a82 --- /dev/null +++ b/numpy/random/src/xorshift1024/xorshift1024.c @@ -0,0 +1,32 @@ +#include "xorshift1024.h" + +/* This is the jump function for the generator. It is equivalent + to 2^512 calls to next(); it can be used to generate 2^512 + non-overlapping subsequences for parallel computations. */ + +extern INLINE uint64_t xorshift1024_next(xorshift1024_state *state); +extern INLINE uint64_t xorshift1024_next64(xorshift1024_state *state); +extern INLINE uint32_t xorshift1024_next32(xorshift1024_state *state); + +void xorshift1024_jump(xorshift1024_state *state) { + int i, j, b; + static const uint64_t JUMP[] = { + 0x84242f96eca9c41d, 0xa3c65b8776f96855, 0x5b34a39f070b5837, + 0x4489affce4f31a1e, 0x2ffeeb0a48316f40, 0xdc2d9891fe68c022, + 0x3659132bb12fea70, 0xaac17d8efa43cab8, 0xc4cb815590989b13, + 0x5ee975283d71c93b, 0x691548c86c1bd540, 0x7910c41d10a1e6a5, + 0x0b5fc64563b3e2a8, 0x047f7684e9fc949d, 0xb99181f2d8f685ca, + 0x284600e3f30e38c3}; + + uint64_t t[16] = {0}; + for (i = 0; i < sizeof JUMP / sizeof *JUMP; i++) + for (b = 0; b < 64; b++) { + if (JUMP[i] & UINT64_C(1) << b) + for (j = 0; j < 16; j++) + t[j] ^= state->s[(j + state->p) & 15]; + xorshift1024_next(state); + } + + for (j = 0; j < 16; j++) + state->s[(j + state->p) & 15] = t[j]; +} diff --git a/numpy/random/src/xorshift1024/xorshift1024.h b/numpy/random/src/xorshift1024/xorshift1024.h new file mode 100644 index 000000000..e0ef77826 --- /dev/null +++ b/numpy/random/src/xorshift1024/xorshift1024.h @@ -0,0 +1,50 @@ +#ifndef _RANDOMDGEN__XORSHIFT1024_H_ +#define _RANDOMDGEN__XORSHIFT1024_H_ + +#ifdef _WIN32 +#if _MSC_VER == 1500 +#include "../common/inttypes.h" +#define INLINE __forceinline +#else +#include <inttypes.h> +#define INLINE __inline __forceinline +#endif +#else +#include <inttypes.h> +#define INLINE inline +#endif + +typedef struct s_xorshift1024_state { + uint64_t s[16]; + int p; + int has_uint32; + uint32_t uinteger; +} xorshift1024_state; + +static INLINE uint64_t xorshift1024_next(xorshift1024_state *state) { + const uint64_t s0 = state->s[state->p]; + uint64_t s1 = state->s[state->p = ((state->p) + 1) & 15]; + s1 ^= s1 << 31; // a + state->s[state->p] = s1 ^ s0 ^ (s1 >> 11) ^ (s0 >> 30); // b,c + return state->s[state->p] * 0x9e3779b97f4a7c13; +} + +static INLINE uint64_t xorshift1024_next64(xorshift1024_state *state) { + return xorshift1024_next(state); +} + +static INLINE uint32_t xorshift1024_next32(xorshift1024_state *state) { + uint64_t next; + if (state->has_uint32) { + state->has_uint32 = 0; + return state->uinteger; + } + next = xorshift1024_next(state); + state->has_uint32 = 1; + state->uinteger = (uint32_t)(next >> 32); + return (uint32_t)(next & 0xffffffff); +} + +void xorshift1024_jump(xorshift1024_state *state); + +#endif diff --git a/numpy/random/src/xorshift1024/xorshift1024.orig.c b/numpy/random/src/xorshift1024/xorshift1024.orig.c new file mode 100644 index 000000000..03c1c17fe --- /dev/null +++ b/numpy/random/src/xorshift1024/xorshift1024.orig.c @@ -0,0 +1,68 @@ +/* Written in 2017 by Sebastiano Vigna (vigna@acm.org) + +To the extent possible under law, the author has dedicated all copyright +and related and neighboring rights to this software to the public domain +worldwide. This software is distributed without any warranty. + +See <http://creativecommons.org/publicdomain/zero/1.0/>. */ + +#include <stdint.h> +#include <string.h> + +/* NOTE: as of 2017-10-08, this generator has a different multiplier (a + fixed-point representation of the golden ratio), which eliminates + linear dependencies from one of the lowest bits. The previous + multiplier was 1181783497276652981 (M_8 in the paper). If you need to + tell apart the two generators, you can refer to this generator as + xorshift1024φ and to the previous one as xorshift1024*M_8. + + This is a fast, high-quality generator. If 1024 bits of state are too + much, try a xoroshiro128+ generator. + + Note that the two lowest bits of this generator are LFSRs of degree + 1024, and thus will fail binary rank tests. The other bits needs a much + higher degree to be represented as LFSRs. + + We suggest to use a sign test to extract a random Boolean value, and + right shifts to extract subsets of bits. + + The state must be seeded so that it is not everywhere zero. If you have + a 64-bit seed, we suggest to seed a splitmix64 generator and use its + output to fill s. */ + +uint64_t s[16]; +int p; + +uint64_t next(void) { + const uint64_t s0 = s[p]; + uint64_t s1 = s[p = (p + 1) & 15]; + s1 ^= s1 << 31; // a + s[p] = s1 ^ s0 ^ (s1 >> 11) ^ (s0 >> 30); // b,c + return s[p] * 0x9e3779b97f4a7c13; +} + +/* This is the jump function for the generator. It is equivalent + to 2^512 calls to next(); it can be used to generate 2^512 + non-overlapping subsequences for parallel computations. */ + +void jump(void) { + static const uint64_t JUMP[] = { + 0x84242f96eca9c41d, 0xa3c65b8776f96855, 0x5b34a39f070b5837, + 0x4489affce4f31a1e, 0x2ffeeb0a48316f40, 0xdc2d9891fe68c022, + 0x3659132bb12fea70, 0xaac17d8efa43cab8, 0xc4cb815590989b13, + 0x5ee975283d71c93b, 0x691548c86c1bd540, 0x7910c41d10a1e6a5, + 0x0b5fc64563b3e2a8, 0x047f7684e9fc949d, 0xb99181f2d8f685ca, + 0x284600e3f30e38c3}; + + uint64_t t[16] = {0}; + for (int i = 0; i < sizeof JUMP / sizeof *JUMP; i++) + for (int b = 0; b < 64; b++) { + if (JUMP[i] & UINT64_C(1) << b) + for (int j = 0; j < 16; j++) + t[j] ^= s[(j + p) & 15]; + next(); + } + + for (int j = 0; j < 16; j++) + s[(j + p) & 15] = t[j]; +} diff --git a/numpy/random/src/xorshift1024/xorshift1024.orig.h b/numpy/random/src/xorshift1024/xorshift1024.orig.h new file mode 100644 index 000000000..9b7597967 --- /dev/null +++ b/numpy/random/src/xorshift1024/xorshift1024.orig.h @@ -0,0 +1,7 @@ +#include <stdint.h> +#include <string.h> + +uint64_t s[16]; +int p; +uint64_t next(void); +void jump(void); diff --git a/numpy/random/src/xoshiro256starstar/LICENSE.md b/numpy/random/src/xoshiro256starstar/LICENSE.md new file mode 100644 index 000000000..d863f3b29 --- /dev/null +++ b/numpy/random/src/xoshiro256starstar/LICENSE.md @@ -0,0 +1,9 @@ +# XOSHIRO256STARSTAR + +Written in 2018 by David Blackman and Sebastiano Vigna (vigna@acm.org) + +To the extent possible under law, the author has dedicated all copyright +and related and neighboring rights to this software to the public domain +worldwide. This software is distributed without any warranty. + +See <http://creativecommons.org/publicdomain/zero/1.0/>.
\ No newline at end of file diff --git a/numpy/random/src/xoshiro256starstar/xoshiro256starstar-test-data-gen.c b/numpy/random/src/xoshiro256starstar/xoshiro256starstar-test-data-gen.c new file mode 100644 index 000000000..8522229dd --- /dev/null +++ b/numpy/random/src/xoshiro256starstar/xoshiro256starstar-test-data-gen.c @@ -0,0 +1,72 @@ +/* + * Generate testing csv files + * + * cl xoshiro256starstar-test-data-gen.c xoshiro256starstar.orig.c / + * ../splitmix64/splitmix64.c /Ox + * xoshiro256starstar-test-data-gen.exe * + * + * gcc xoshiro256starstar-test-data-gen.c xoshiro256starstar.orig.c / + * ../splitmix64/splitmix64.c -o xoshiro256starstar-test-data-gen + * ./xoshiro256starstar-test-data-gen + * + * Requres the Random123 directory containing header files to be located in the + * same directory (not included). + * + */ + +#include "../splitmix64/splitmix64.h" +#include "xoshiro256starstar.orig.h" +#include <inttypes.h> +#include <stdio.h> + +#define N 1000 + +int main() { + uint64_t sum = 0; + uint64_t state, seed = 0xDEADBEAF; + state = seed; + int i; + for (i = 0; i < 4; i++) { + s[i] = splitmix64_next(&state); + } + uint64_t store[N]; + for (i = 0; i < N; i++) { + store[i] = next(); + } + + FILE *fp; + fp = fopen("xoshiro256starstar-testset-1.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); + + seed = state = 0; + for (i = 0; i < 4; i++) { + s[i] = splitmix64_next(&state); + } + for (i = 0; i < N; i++) { + store[i] = next(); + } + fp = fopen("xoshiro256starstar-testset-2.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); +} diff --git a/numpy/random/src/xoshiro256starstar/xoshiro256starstar.c b/numpy/random/src/xoshiro256starstar/xoshiro256starstar.c new file mode 100644 index 000000000..30b6c7d85 --- /dev/null +++ b/numpy/random/src/xoshiro256starstar/xoshiro256starstar.c @@ -0,0 +1,55 @@ +/* Written in 2018 by David Blackman and Sebastiano Vigna (vigna@acm.org) + +To the extent possible under law, the author has dedicated all copyright +and related and neighboring rights to this software to the public domain +worldwide. This software is distributed without any warranty. + +See <http://creativecommons.org/publicdomain/zero/1.0/>. */ + +#include "xoshiro256starstar.h" + +/* This is xoshiro256** 1.0, our all-purpose, rock-solid generator. It has + excellent (sub-ns) speed, a state (256 bits) that is large enough for + any parallel application, and it passes all tests we are aware of. + + For generating just floating-point numbers, xoshiro256+ is even faster. + + The state must be seeded so that it is not everywhere zero. If you have + a 64-bit seed, we suggest to seed a splitmix64 generator and use its + output to fill s. */ + +extern INLINE uint64_t xoshiro256starstar_next64(xoshiro256starstar_state *state); + +extern INLINE uint32_t xoshiro256starstar_next32(xoshiro256starstar_state *state); + +/* This is the jump function for the generator. It is equivalent + to 2^128 calls to next(); it can be used to generate 2^128 + non-overlapping subsequences for parallel computations. */ + +void xoshiro256starstar_jump(xoshiro256starstar_state *state) +{ + int i, b; + static const uint64_t JUMP[] = {0x180ec6d33cfd0aba, 0xd5a61266f0c9392c, 0xa9582618e03fc9aa, 0x39abdc4529b1661c}; + + uint64_t s0 = 0; + uint64_t s1 = 0; + uint64_t s2 = 0; + uint64_t s3 = 0; + for (i = 0; i < sizeof JUMP / sizeof *JUMP; i++) + for (b = 0; b < 64; b++) + { + if (JUMP[i] & UINT64_C(1) << b) + { + s0 ^= state->s[0]; + s1 ^= state->s[1]; + s2 ^= state->s[2]; + s3 ^= state->s[3]; + } + xoshiro256starstar_next(&state->s[0]); + } + + state->s[0] = s0; + state->s[1] = s1; + state->s[2] = s2; + state->s[3] = s3; +} diff --git a/numpy/random/src/xoshiro256starstar/xoshiro256starstar.h b/numpy/random/src/xoshiro256starstar/xoshiro256starstar.h new file mode 100644 index 000000000..1d7d8ea40 --- /dev/null +++ b/numpy/random/src/xoshiro256starstar/xoshiro256starstar.h @@ -0,0 +1,63 @@ +#ifndef _RANDOMDGEN__XOSHIRO256STARSTAR_H_ +#define _RANDOMDGEN__XOSHIRO256STARSTAR_H_ + +#ifdef _WIN32 +#if _MSC_VER == 1500 +#include "../common/inttypes.h" +#define INLINE __forceinline +#else +#include <inttypes.h> +#define INLINE __inline __forceinline +#endif +#else +#include <inttypes.h> +#define INLINE inline +#endif + +typedef struct s_xoshiro256starstar_state { + uint64_t s[4]; + int has_uint32; + uint32_t uinteger; +} xoshiro256starstar_state; + +static INLINE uint64_t rotl(const uint64_t x, int k) { + return (x << k) | (x >> (64 - k)); +} + +static INLINE uint64_t xoshiro256starstar_next(uint64_t *s) { + const uint64_t result_starstar = rotl(s[1] * 5, 7) * 9; + const uint64_t t = s[1] << 17; + + s[2] ^= s[0]; + s[3] ^= s[1]; + s[1] ^= s[2]; + s[0] ^= s[3]; + + s[2] ^= t; + + s[3] = rotl(s[3], 45); + + return result_starstar; +} + +static INLINE uint64_t +xoshiro256starstar_next64(xoshiro256starstar_state *state) { + return xoshiro256starstar_next(&state->s[0]); +} + +static INLINE uint32_t +xoshiro256starstar_next32(xoshiro256starstar_state *state) { + uint64_t next; + if (state->has_uint32) { + state->has_uint32 = 0; + return state->uinteger; + } + next = xoshiro256starstar_next(&state->s[0]); + state->has_uint32 = 1; + state->uinteger = (uint32_t)(next >> 32); + return (uint32_t)(next & 0xffffffff); +} + +void xoshiro256starstar_jump(xoshiro256starstar_state *state); + +#endif diff --git a/numpy/random/src/xoshiro256starstar/xoshiro256starstar.orig.c b/numpy/random/src/xoshiro256starstar/xoshiro256starstar.orig.c new file mode 100644 index 000000000..ecf87bab9 --- /dev/null +++ b/numpy/random/src/xoshiro256starstar/xoshiro256starstar.orig.c @@ -0,0 +1,103 @@ +/* Written in 2018 by David Blackman and Sebastiano Vigna (vigna@acm.org) + +To the extent possible under law, the author has dedicated all copyright +and related and neighboring rights to this software to the public domain +worldwide. This software is distributed without any warranty. + +See <http://creativecommons.org/publicdomain/zero/1.0/>. */ + +#include <stdint.h> + +/* This is xoshiro256** 1.0, our all-purpose, rock-solid generator. It has + excellent (sub-ns) speed, a state (256 bits) that is large enough for + any parallel application, and it passes all tests we are aware of. + + For generating just floating-point numbers, xoshiro256+ is even faster. + + The state must be seeded so that it is not everywhere zero. If you have + a 64-bit seed, we suggest to seed a splitmix64 generator and use its + output to fill s. */ + +static inline uint64_t rotl(const uint64_t x, int k) { + return (x << k) | (x >> (64 - k)); +} + + +uint64_t s[4]; + +uint64_t next(void) { + const uint64_t result_starstar = rotl(s[1] * 5, 7) * 9; + + const uint64_t t = s[1] << 17; + + s[2] ^= s[0]; + s[3] ^= s[1]; + s[1] ^= s[2]; + s[0] ^= s[3]; + + s[2] ^= t; + + s[3] = rotl(s[3], 45); + + return result_starstar; +} + + +/* This is the jump function for the generator. It is equivalent + to 2^128 calls to next(); it can be used to generate 2^128 + non-overlapping subsequences for parallel computations. */ + +void jump(void) { + static const uint64_t JUMP[] = { 0x180ec6d33cfd0aba, 0xd5a61266f0c9392c, 0xa9582618e03fc9aa, 0x39abdc4529b1661c }; + + uint64_t s0 = 0; + uint64_t s1 = 0; + uint64_t s2 = 0; + uint64_t s3 = 0; + for(int i = 0; i < sizeof JUMP / sizeof *JUMP; i++) + for(int b = 0; b < 64; b++) { + if (JUMP[i] & UINT64_C(1) << b) { + s0 ^= s[0]; + s1 ^= s[1]; + s2 ^= s[2]; + s3 ^= s[3]; + } + next(); + } + + s[0] = s0; + s[1] = s1; + s[2] = s2; + s[3] = s3; +} + + + +/* This is the long-jump function for the generator. It is equivalent to + 2^192 calls to next(); it can be used to generate 2^64 starting points, + from each of which jump() will generate 2^64 non-overlapping + subsequences for parallel distributed computations. */ + +void long_jump(void) { + static const uint64_t LONG_JUMP[] = { 0x76e15d3efefdcbbf, 0xc5004e441c522fb3, 0x77710069854ee241, 0x39109bb02acbe635 }; + + uint64_t s0 = 0; + uint64_t s1 = 0; + uint64_t s2 = 0; + uint64_t s3 = 0; + for(int i = 0; i < sizeof LONG_JUMP / sizeof *LONG_JUMP; i++) + for(int b = 0; b < 64; b++) { + if (LONG_JUMP[i] & UINT64_C(1) << b) { + s0 ^= s[0]; + s1 ^= s[1]; + s2 ^= s[2]; + s3 ^= s[3]; + } + next(); + } + + s[0] = s0; + s[1] = s1; + s[2] = s2; + s[3] = s3; +}
\ No newline at end of file diff --git a/numpy/random/src/xoshiro256starstar/xoshiro256starstar.orig.h b/numpy/random/src/xoshiro256starstar/xoshiro256starstar.orig.h new file mode 100644 index 000000000..3aa788ec9 --- /dev/null +++ b/numpy/random/src/xoshiro256starstar/xoshiro256starstar.orig.h @@ -0,0 +1,5 @@ +#include <stdint.h> + +uint64_t s[4]; +uint64_t next(void); +void jump(void); diff --git a/numpy/random/src/xoshiro512starstar/LICENSE.md b/numpy/random/src/xoshiro512starstar/LICENSE.md new file mode 100644 index 000000000..aa34c1966 --- /dev/null +++ b/numpy/random/src/xoshiro512starstar/LICENSE.md @@ -0,0 +1,9 @@ +# XOSHIRO512STARSTAR + +Written in 2018 by David Blackman and Sebastiano Vigna (vigna@acm.org) + +To the extent possible under law, the author has dedicated all copyright +and related and neighboring rights to this software to the public domain +worldwide. This software is distributed without any warranty. + +See <http://creativecommons.org/publicdomain/zero/1.0/>. diff --git a/numpy/random/src/xoshiro512starstar/xoshiro512starstar-test-data-gen.c b/numpy/random/src/xoshiro512starstar/xoshiro512starstar-test-data-gen.c new file mode 100644 index 000000000..bcc3574e4 --- /dev/null +++ b/numpy/random/src/xoshiro512starstar/xoshiro512starstar-test-data-gen.c @@ -0,0 +1,72 @@ +/* + * Generate testing csv files + * + * cl xoshiro512starstar-test-data-gen.c xoshiro512starstar.orig.c / + * ../splitmix64/splitmix64.c /Ox + * xoshiro512starstar-test-data-gen.exe * + * + * gcc xoshiro512starstar-test-data-gen.c xoshiro512starstar.orig.c / + * ../splitmix64/splitmix64.c -o xoshiro512starstar-test-data-gen + * ./xoshiro512starstar-test-data-gen + * + * Requres the Random123 directory containing header files to be located in the + * same directory (not included). + * + */ + +#include "../splitmix64/splitmix64.h" +#include "xoshiro512starstar.orig.h" +#include <inttypes.h> +#include <stdio.h> + +#define N 1000 + +int main() { + uint64_t sum = 0; + uint64_t state, seed = 0xDEADBEAF; + state = seed; + int i; + for (i = 0; i < 8; i++) { + s[i] = splitmix64_next(&state); + } + uint64_t store[N]; + for (i = 0; i < N; i++) { + store[i] = next(); + } + + FILE *fp; + fp = fopen("xoshiro512starstar-testset-1.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); + + seed = state = 0; + for (i = 0; i < 8; i++) { + s[i] = splitmix64_next(&state); + } + for (i = 0; i < N; i++) { + store[i] = next(); + } + fp = fopen("xoshiro512starstar-testset-2.csv", "w"); + if (fp == NULL) { + printf("Couldn't open file\n"); + return -1; + } + fprintf(fp, "seed, 0x%" PRIx64 "\n", seed); + for (i = 0; i < N; i++) { + fprintf(fp, "%d, 0x%" PRIx64 "\n", i, store[i]); + if (i == 999) { + printf("%d, 0x%" PRIx64 "\n", i, store[i]); + } + } + fclose(fp); +} diff --git a/numpy/random/src/xoshiro512starstar/xoshiro512starstar.c b/numpy/random/src/xoshiro512starstar/xoshiro512starstar.c new file mode 100644 index 000000000..a9f56699f --- /dev/null +++ b/numpy/random/src/xoshiro512starstar/xoshiro512starstar.c @@ -0,0 +1,53 @@ +/* Written in 2018 by David Blackman and Sebastiano Vigna (vigna@acm.org) + +To the extent possible under law, the author has dedicated all copyright +and related and neighboring rights to this software to the public domain +worldwide. This software is distributed without any warranty. + +See <http://creativecommons.org/publicdomain/zero/1.0/>. */ + +#include "xoshiro512starstar.h" + +/* This is xoshiro512** 1.0, an all-purpose, rock-solid generator. It has + excellent (about 1ns) speed, an increased state (512 bits) that is + large enough for any parallel application, and it passes all tests we + are aware of. + + For generating just floating-point numbers, xoshiro512+ is even faster. + + The state must be seeded so that it is not everywhere zero. If you have + a 64-bit seed, we suggest to seed a splitmix64 generator and use its + output to fill s. */ + +extern INLINE uint64_t +xoshiro512starstar_next64(xoshiro512starstar_state *state); + +extern INLINE uint32_t +xoshiro512starstar_next32(xoshiro512starstar_state *state); + +/* This is the jump function for the generator. It is equivalent + to 2^256 calls to next(); it can be used to generate 2^256 + non-overlapping subsequences for parallel computations. */ + +static uint64_t s_placeholder[8]; + +void xoshiro512starstar_jump(xoshiro512starstar_state *state) { + + int i, b, w; + static const uint64_t JUMP[] = {0x33ed89b6e7a353f9, 0x760083d7955323be, + 0x2837f2fbb5f22fae, 0x4b8c5674d309511c, + 0xb11ac47a7ba28c25, 0xf1be7667092bcc1c, + 0x53851efdb6df0aaf, 0x1ebbc8b23eaf25db}; + + uint64_t t[sizeof s_placeholder / sizeof *s_placeholder]; + memset(t, 0, sizeof t); + for (i = 0; i < sizeof JUMP / sizeof *JUMP; i++) + for (b = 0; b < 64; b++) { + if (JUMP[i] & UINT64_C(1) << b) + for (w = 0; w < sizeof s_placeholder / sizeof *s_placeholder; w++) + t[w] ^= state->s[w]; + xoshiro512starstar_next(&state->s[0]); + } + + memcpy(state->s, t, sizeof s_placeholder); +} diff --git a/numpy/random/src/xoshiro512starstar/xoshiro512starstar.h b/numpy/random/src/xoshiro512starstar/xoshiro512starstar.h new file mode 100644 index 000000000..0fa0ba3cd --- /dev/null +++ b/numpy/random/src/xoshiro512starstar/xoshiro512starstar.h @@ -0,0 +1,75 @@ +#ifndef _RANDOMDGEN__XOSHIRO512STARSTAR_H_ +#define _RANDOMDGEN__XOSHIRO512STARSTAR_H_ + +#ifdef _WIN32 +#if _MSC_VER == 1500 +#include "../common/inttypes.h" +#define INLINE __forceinline +#else +#include <inttypes.h> +#define INLINE __inline __forceinline +#endif +#else +#include <inttypes.h> +#define INLINE inline +#endif +#include <string.h> + +typedef struct s_xoshiro512starstar_state +{ + uint64_t s[8]; + int has_uint32; + uint32_t uinteger; +} xoshiro512starstar_state; + +static INLINE uint64_t rotl(const uint64_t x, int k) +{ + return (x << k) | (x >> (64 - k)); +} + +static INLINE uint64_t xoshiro512starstar_next(uint64_t *s) +{ + const uint64_t result_starstar = rotl(s[1] * 5, 7) * 9; + + const uint64_t t = s[1] << 11; + + s[2] ^= s[0]; + s[5] ^= s[1]; + s[1] ^= s[2]; + s[7] ^= s[3]; + s[3] ^= s[4]; + s[4] ^= s[5]; + s[0] ^= s[6]; + s[6] ^= s[7]; + + s[6] ^= t; + + s[7] = rotl(s[7], 21); + + return result_starstar; +} + +static INLINE uint64_t +xoshiro512starstar_next64(xoshiro512starstar_state *state) +{ + return xoshiro512starstar_next(&state->s[0]); +} + +static INLINE uint32_t +xoshiro512starstar_next32(xoshiro512starstar_state *state) +{ + uint64_t next; + if (state->has_uint32) + { + state->has_uint32 = 0; + return state->uinteger; + } + next = xoshiro512starstar_next(&state->s[0]); + state->has_uint32 = 1; + state->uinteger = (uint32_t)(next >> 32); + return (uint32_t)(next & 0xffffffff); +} + +void xoshiro512starstar_jump(xoshiro512starstar_state *state); + +#endif diff --git a/numpy/random/src/xoshiro512starstar/xoshiro512starstar.orig.c b/numpy/random/src/xoshiro512starstar/xoshiro512starstar.orig.c new file mode 100644 index 000000000..0cf884edb --- /dev/null +++ b/numpy/random/src/xoshiro512starstar/xoshiro512starstar.orig.c @@ -0,0 +1,67 @@ +/* Written in 2018 by David Blackman and Sebastiano Vigna (vigna@acm.org) + +To the extent possible under law, the author has dedicated all copyright +and related and neighboring rights to this software to the public domain +worldwide. This software is distributed without any warranty. + +See <http://creativecommons.org/publicdomain/zero/1.0/>. */ + +#include "xoshiro512starstar.orig.h" + +/* This is xoshiro512** 1.0, an all-purpose, rock-solid generator. It has + excellent (about 1ns) speed, an increased state (512 bits) that is + large enough for any parallel application, and it passes all tests we + are aware of. + + For generating just floating-point numbers, xoshiro512+ is even faster. + + The state must be seeded so that it is not everywhere zero. If you have + a 64-bit seed, we suggest to seed a splitmix64 generator and use its + output to fill s. */ + +static inline uint64_t rotl(const uint64_t x, int k) { + return (x << k) | (x >> (64 - k)); +} + + +uint64_t next(void) { + const uint64_t result_starstar = rotl(s[1] * 5, 7) * 9; + + const uint64_t t = s[1] << 11; + + s[2] ^= s[0]; + s[5] ^= s[1]; + s[1] ^= s[2]; + s[7] ^= s[3]; + s[3] ^= s[4]; + s[4] ^= s[5]; + s[0] ^= s[6]; + s[6] ^= s[7]; + + s[6] ^= t; + + s[7] = rotl(s[7], 21); + + return result_starstar; +} + + +/* This is the jump function for the generator. It is equivalent + to 2^256 calls to next(); it can be used to generate 2^256 + non-overlapping subsequences for parallel computations. */ + +void jump(void) { + static const uint64_t JUMP[] = { 0x33ed89b6e7a353f9, 0x760083d7955323be, 0x2837f2fbb5f22fae, 0x4b8c5674d309511c, 0xb11ac47a7ba28c25, 0xf1be7667092bcc1c, 0x53851efdb6df0aaf, 0x1ebbc8b23eaf25db }; + + uint64_t t[sizeof s / sizeof *s]; + memset(t, 0, sizeof t); + for(int i = 0; i < sizeof JUMP / sizeof *JUMP; i++) + for(int b = 0; b < 64; b++) { + if (JUMP[i] & UINT64_C(1) << b) + for(int w = 0; w < sizeof s / sizeof *s; w++) + t[w] ^= s[w]; + next(); + } + + memcpy(s, t, sizeof s); +}
\ No newline at end of file diff --git a/numpy/random/src/xoshiro512starstar/xoshiro512starstar.orig.h b/numpy/random/src/xoshiro512starstar/xoshiro512starstar.orig.h new file mode 100644 index 000000000..0b7892473 --- /dev/null +++ b/numpy/random/src/xoshiro512starstar/xoshiro512starstar.orig.h @@ -0,0 +1,6 @@ +#include <stdint.h> +#include <string.h> + +uint64_t s[8]; +uint64_t next(void); +void jump(void); |
