diff options
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/core/code_generators/generate_umath.py | 1 | ||||
-rw-r--r-- | numpy/core/include/numpy/npy_math.h | 47 | ||||
-rw-r--r-- | numpy/core/src/umath/loops.c.src | 20 | ||||
-rw-r--r-- | numpy/core/src/umath/loops.h.src | 6 | ||||
-rw-r--r-- | numpy/core/src/umath/npy_simd_data.h | 137 | ||||
-rw-r--r-- | numpy/core/src/umath/simd.inc.src | 187 | ||||
-rw-r--r-- | numpy/core/tests/data/umath-validation-set-exp | 277 | ||||
-rw-r--r-- | numpy/core/tests/test_umath.py | 12 | ||||
-rw-r--r-- | numpy/core/tests/test_umath_accuracy.py | 17 |
9 files changed, 651 insertions, 53 deletions
diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py index c14711d16..52ae3cdd7 100644 --- a/numpy/core/code_generators/generate_umath.py +++ b/numpy/core/code_generators/generate_umath.py @@ -702,6 +702,7 @@ defdict = { None, TD('e', f='exp', astype={'e':'f'}), TD('f', simd=[('fma', 'f'), ('avx512f', 'f')]), + TD('d', simd=[('avx512f', 'd')]), TD('fdg' + cmplx, f='exp'), TD(P, f='exp'), ), diff --git a/numpy/core/include/numpy/npy_math.h b/numpy/core/include/numpy/npy_math.h index 69e690f28..a07f49501 100644 --- a/numpy/core/include/numpy/npy_math.h +++ b/numpy/core/include/numpy/npy_math.h @@ -114,53 +114,6 @@ NPY_INLINE static float __npy_nzerof(void) #define NPY_SQRT1_2l 0.707106781186547524400844362104849039L /* 1/sqrt(2) */ /* - * Constants used in vector implementation of exp(x) - */ -#define NPY_RINT_CVT_MAGICf 0x1.800000p+23f -#define NPY_CODY_WAITE_LOGE_2_HIGHf -6.93145752e-1f -#define NPY_CODY_WAITE_LOGE_2_LOWf -1.42860677e-6f -#define NPY_COEFF_P0_EXPf 9.999999999980870924916e-01f -#define NPY_COEFF_P1_EXPf 7.257664613233124478488e-01f -#define NPY_COEFF_P2_EXPf 2.473615434895520810817e-01f -#define NPY_COEFF_P3_EXPf 5.114512081637298353406e-02f -#define NPY_COEFF_P4_EXPf 6.757896990527504603057e-03f -#define NPY_COEFF_P5_EXPf 5.082762527590693718096e-04f -#define NPY_COEFF_Q0_EXPf 1.000000000000000000000e+00f -#define NPY_COEFF_Q1_EXPf -2.742335390411667452936e-01f -#define NPY_COEFF_Q2_EXPf 2.159509375685829852307e-02f - -/* - * Constants used in vector implementation of log(x) - */ -#define NPY_COEFF_P0_LOGf 0.000000000000000000000e+00f -#define NPY_COEFF_P1_LOGf 9.999999999999998702752e-01f -#define NPY_COEFF_P2_LOGf 2.112677543073053063722e+00f -#define NPY_COEFF_P3_LOGf 1.480000633576506585156e+00f -#define NPY_COEFF_P4_LOGf 3.808837741388407920751e-01f -#define NPY_COEFF_P5_LOGf 2.589979117907922693523e-02f -#define NPY_COEFF_Q0_LOGf 1.000000000000000000000e+00f -#define NPY_COEFF_Q1_LOGf 2.612677543073109236779e+00f -#define NPY_COEFF_Q2_LOGf 2.453006071784736363091e+00f -#define NPY_COEFF_Q3_LOGf 9.864942958519418960339e-01f -#define NPY_COEFF_Q4_LOGf 1.546476374983906719538e-01f -#define NPY_COEFF_Q5_LOGf 5.875095403124574342950e-03f -/* - * Constants used in vector implementation of sinf/cosf(x) - */ -#define NPY_TWO_O_PIf 0x1.45f306p-1f -#define NPY_CODY_WAITE_PI_O_2_HIGHf -0x1.921fb0p+00f -#define NPY_CODY_WAITE_PI_O_2_MEDf -0x1.5110b4p-22f -#define NPY_CODY_WAITE_PI_O_2_LOWf -0x1.846988p-48f -#define NPY_COEFF_INVF0_COSINEf 0x1.000000p+00f -#define NPY_COEFF_INVF2_COSINEf -0x1.000000p-01f -#define NPY_COEFF_INVF4_COSINEf 0x1.55553cp-05f -#define NPY_COEFF_INVF6_COSINEf -0x1.6c06dcp-10f -#define NPY_COEFF_INVF8_COSINEf 0x1.98e616p-16f -#define NPY_COEFF_INVF3_SINEf -0x1.555556p-03f -#define NPY_COEFF_INVF5_SINEf 0x1.11119ap-07f -#define NPY_COEFF_INVF7_SINEf -0x1.a06bbap-13f -#define NPY_COEFF_INVF9_SINEf 0x1.7d3bbcp-19f -/* * Integer functions. */ NPY_INPLACE npy_uint npy_gcdu(npy_uint a, npy_uint b); diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index 9b43824cb..eea82309c 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -1558,6 +1558,15 @@ FLOAT_@func@(char **args, npy_intp const *dimensions, npy_intp const *steps, voi /**end repeat**/ +NPY_NO_EXPORT NPY_GCC_OPT_3 void +DOUBLE_exp(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data)) +{ + UNARY_LOOP { + const npy_double in1 = *(npy_double *)ip1; + *(npy_double *)op1 = npy_exp(in1); + } +} + /**begin repeat * #isa = avx512f, fma# * #ISA = AVX512F, FMA# @@ -1688,6 +1697,17 @@ FLOAT_@func@_@isa@(char **args, npy_intp const *dimensions, npy_intp const *step /**end repeat1**/ /**end repeat**/ +NPY_NO_EXPORT NPY_GCC_OPT_3 void +DOUBLE_exp_avx512f(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data)) +{ + if (!run_unary_avx512f_exp_DOUBLE(args, dimensions, steps)) { + UNARY_LOOP { + const npy_double in1 = *(npy_double *)ip1; + *(npy_double *)op1 = npy_exp(in1); + } + } +} + /**begin repeat * Float types diff --git a/numpy/core/src/umath/loops.h.src b/numpy/core/src/umath/loops.h.src index e9d0b4c62..50a7ccfee 100644 --- a/numpy/core/src/umath/loops.h.src +++ b/numpy/core/src/umath/loops.h.src @@ -196,6 +196,12 @@ NPY_NO_EXPORT void /**end repeat1**/ /**end repeat**/ +NPY_NO_EXPORT void +DOUBLE_exp(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +DOUBLE_exp_avx512f(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)); + /**begin repeat * #func = sin, cos, exp, log# */ diff --git a/numpy/core/src/umath/npy_simd_data.h b/numpy/core/src/umath/npy_simd_data.h new file mode 100644 index 000000000..36c8b6c03 --- /dev/null +++ b/numpy/core/src/umath/npy_simd_data.h @@ -0,0 +1,137 @@ +#ifndef __NPY_SIMD_DATA_H_ +#define __NPY_SIMD_DATA_H_ +#if defined HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS +/* + * Constants used in vector implementation of float64 exp(x) + */ +#define NPY_RINT_CVT_MAGIC 0x1.8p52 +#define NPY_INV_LN2_MUL_32 0x1.71547652b82fep+5 +#define NPY_TANG_NEG_L1 -0x1.62e42fefp-6 +#define NPY_TANG_NEG_L2 -0x1.473de6af278edp-39 +#define NPY_TANG_A1 0x1p-1 +#define NPY_TANG_A2 0x1.5555555548f7cp-3 +#define NPY_TANG_A3 0x1.5555555545d4ep-5 +#define NPY_TANG_A4 0x1.11115b7aa905ep-7 +#define NPY_TANG_A5 0x1.6c1728d739765p-10 + +/* Lookup table for 2^(j/32) */ +static npy_uint64 EXP_Table_top[32] = { + 0x3FF0000000000000, + 0x3FF059B0D3158540, + 0x3FF0B5586CF98900, + 0x3FF11301D0125B40, + 0x3FF172B83C7D5140, + 0x3FF1D4873168B980, + 0x3FF2387A6E756200, + 0x3FF29E9DF51FDEC0, + 0x3FF306FE0A31B700, + 0x3FF371A7373AA9C0, + 0x3FF3DEA64C123400, + 0x3FF44E0860618900, + 0x3FF4BFDAD5362A00, + 0x3FF5342B569D4F80, + 0x3FF5AB07DD485400, + 0x3FF6247EB03A5580, + 0x3FF6A09E667F3BC0, + 0x3FF71F75E8EC5F40, + 0x3FF7A11473EB0180, + 0x3FF82589994CCE00, + 0x3FF8ACE5422AA0C0, + 0x3FF93737B0CDC5C0, + 0x3FF9C49182A3F080, + 0x3FFA5503B23E2540, + 0x3FFAE89F995AD380, + 0x3FFB7F76F2FB5E40, + 0x3FFC199BDD855280, + 0x3FFCB720DCEF9040, + 0x3FFD5818DCFBA480, + 0x3FFDFC97337B9B40, + 0x3FFEA4AFA2A490C0, + 0x3FFF50765B6E4540, +}; + +static npy_uint64 EXP_Table_tail[32] = { + 0x0000000000000000, + 0x3D0A1D73E2A475B4, + 0x3CEEC5317256E308, + 0x3CF0A4EBBF1AED93, + 0x3D0D6E6FBE462876, + 0x3D053C02DC0144C8, + 0x3D0C3360FD6D8E0B, + 0x3D009612E8AFAD12, + 0x3CF52DE8D5A46306, + 0x3CE54E28AA05E8A9, + 0x3D011ADA0911F09F, + 0x3D068189B7A04EF8, + 0x3D038EA1CBD7F621, + 0x3CBDF0A83C49D86A, + 0x3D04AC64980A8C8F, + 0x3CD2C7C3E81BF4B7, + 0x3CE921165F626CDD, + 0x3D09EE91B8797785, + 0x3CDB5F54408FDB37, + 0x3CF28ACF88AFAB35, + 0x3CFB5BA7C55A192D, + 0x3D027A280E1F92A0, + 0x3CF01C7C46B071F3, + 0x3CFC8B424491CAF8, + 0x3D06AF439A68BB99, + 0x3CDBAA9EC206AD4F, + 0x3CFC2220CB12A092, + 0x3D048A81E5E8F4A5, + 0x3CDC976816BAD9B8, + 0x3CFEB968CAC39ED3, + 0x3CF9858F73A18F5E, + 0x3C99D3E12DD8A18B, +}; +#endif + +/* + * Constants used in vector implementation of exp(x) + */ +#define NPY_RINT_CVT_MAGICf 0x1.800000p+23f +#define NPY_CODY_WAITE_LOGE_2_HIGHf -6.93145752e-1f +#define NPY_CODY_WAITE_LOGE_2_LOWf -1.42860677e-6f +#define NPY_COEFF_P0_EXPf 9.999999999980870924916e-01f +#define NPY_COEFF_P1_EXPf 7.257664613233124478488e-01f +#define NPY_COEFF_P2_EXPf 2.473615434895520810817e-01f +#define NPY_COEFF_P3_EXPf 5.114512081637298353406e-02f +#define NPY_COEFF_P4_EXPf 6.757896990527504603057e-03f +#define NPY_COEFF_P5_EXPf 5.082762527590693718096e-04f +#define NPY_COEFF_Q0_EXPf 1.000000000000000000000e+00f +#define NPY_COEFF_Q1_EXPf -2.742335390411667452936e-01f +#define NPY_COEFF_Q2_EXPf 2.159509375685829852307e-02f + +/* + * Constants used in vector implementation of log(x) + */ +#define NPY_COEFF_P0_LOGf 0.000000000000000000000e+00f +#define NPY_COEFF_P1_LOGf 9.999999999999998702752e-01f +#define NPY_COEFF_P2_LOGf 2.112677543073053063722e+00f +#define NPY_COEFF_P3_LOGf 1.480000633576506585156e+00f +#define NPY_COEFF_P4_LOGf 3.808837741388407920751e-01f +#define NPY_COEFF_P5_LOGf 2.589979117907922693523e-02f +#define NPY_COEFF_Q0_LOGf 1.000000000000000000000e+00f +#define NPY_COEFF_Q1_LOGf 2.612677543073109236779e+00f +#define NPY_COEFF_Q2_LOGf 2.453006071784736363091e+00f +#define NPY_COEFF_Q3_LOGf 9.864942958519418960339e-01f +#define NPY_COEFF_Q4_LOGf 1.546476374983906719538e-01f +#define NPY_COEFF_Q5_LOGf 5.875095403124574342950e-03f +/* + * Constants used in vector implementation of sinf/cosf(x) + */ +#define NPY_TWO_O_PIf 0x1.45f306p-1f +#define NPY_CODY_WAITE_PI_O_2_HIGHf -0x1.921fb0p+00f +#define NPY_CODY_WAITE_PI_O_2_MEDf -0x1.5110b4p-22f +#define NPY_CODY_WAITE_PI_O_2_LOWf -0x1.846988p-48f +#define NPY_COEFF_INVF0_COSINEf 0x1.000000p+00f +#define NPY_COEFF_INVF2_COSINEf -0x1.000000p-01f +#define NPY_COEFF_INVF4_COSINEf 0x1.55553cp-05f +#define NPY_COEFF_INVF6_COSINEf -0x1.6c06dcp-10f +#define NPY_COEFF_INVF8_COSINEf 0x1.98e616p-16f +#define NPY_COEFF_INVF3_SINEf -0x1.555556p-03f +#define NPY_COEFF_INVF5_SINEf 0x1.11119ap-07f +#define NPY_COEFF_INVF7_SINEf -0x1.a06bbap-13f +#define NPY_COEFF_INVF9_SINEf 0x1.7d3bbcp-19f + +#endif diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src index 4265476b5..106c7e7c9 100644 --- a/numpy/core/src/umath/simd.inc.src +++ b/numpy/core/src/umath/simd.inc.src @@ -18,6 +18,7 @@ #include "lowlevel_strided_loops.h" #include "numpy/npy_common.h" #include "numpy/npy_math.h" +#include "npy_simd_data.h" #ifdef NPY_HAVE_SSE2_INTRINSICS #include <emmintrin.h> #if !defined(_MSC_VER) || _MSC_VER >= 1600 @@ -387,6 +388,25 @@ run_unary_@isa@_sincos_FLOAT(char **args, npy_intp const *dimensions, npy_intp c /**end repeat**/ +#if defined HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS +static NPY_INLINE void +AVX512F_exp_DOUBLE(npy_double *, npy_double *, const npy_intp n, const npy_intp stride); +#endif +static NPY_INLINE int +run_unary_avx512f_exp_DOUBLE(char **args, npy_intp const *dimensions, npy_intp const *steps) +{ +#if defined HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS +#if !(defined(__clang__) && (__clang_major__ < 10 || (__clang_major__ == 10 && __clang_minor__ < 1))) + if (IS_OUTPUT_BLOCKABLE_UNARY(sizeof(npy_double), 64)) { + AVX512F_exp_DOUBLE((npy_double*)args[1], (npy_double*)args[0], dimensions[0], steps[0]); + return 1; + } + else + return 0; +#endif +#endif + return 0; +} /**begin repeat * Float types @@ -1695,6 +1715,22 @@ avx512_scalef_ps(__m512 poly, __m512 quadrant) { return _mm512_scalef_ps(poly, quadrant); } + +static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F __m512d +avx512_permute_x4var_pd(__m512d t0, + __m512d t1, + __m512d t2, + __m512d t3, + __m512i index) +{ + + __mmask8 lut_mask = _mm512_cmp_epi64_mask(index, _mm512_set1_epi64(15), + _MM_CMPINT_GT); + __m512d res1 = _mm512_permutex2var_pd(t0, index, t1); + __m512d res2 = _mm512_permutex2var_pd(t2, index, t3); + return _mm512_mask_blend_pd(lut_mask, res1, res2); +} + /**begin repeat * #vsub = ps, pd# * #type= npy_float, npy_double# @@ -2654,6 +2690,157 @@ static NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ void #endif /**end repeat**/ +/* + * Vectorized implementation of exp double using AVX512 + * Reference: Tang, P.T.P., "Table-driven implementation of the + * exponential function in IEEE floating-point + * arithmetic," ACM Transactions on Mathematical + * Software, vol. 15, pp. 144-157, 1989. + * 1) if x > mTH_max or x is INF; return INF (overflow) + * 2) if x < mTH_min; return 0.0f (underflow) + * 3) if abs(x) < mTH_nearzero; return 1.0f + x + * 4) if x is Nan; return Nan + * 5) Range reduction: + * x = (32m + j)ln2 / 32 + r; r in [-ln2/64, ln2/64] + * 6) exp(r) - 1 is approximated by a polynomial function p(r) + * exp(x) = 2^m(2^(j/32) + 2^(j/32)p(r)); + */ +#if defined HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS +#if !(defined(__clang__) && (__clang_major__ < 10 || (__clang_major__ == 10 && __clang_minor__ < 1))) +static NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F void +AVX512F_exp_DOUBLE(npy_double * op, + npy_double * ip, + const npy_intp array_size, + const npy_intp steps) +{ + npy_intp num_remaining_elements = array_size; + const npy_intp stride = steps / (npy_intp)sizeof(npy_double); + const npy_int num_lanes = 64 / (npy_intp)sizeof(npy_double); + npy_int32 indexarr[8]; + for (npy_int32 ii = 0; ii < 8; ii++) { + indexarr[ii] = ii*stride; + } + + __m512d InvLn2N = _mm512_set1_pd(NPY_INV_LN2_MUL_32); + __m512d mShift = _mm512_set1_pd(NPY_RINT_CVT_MAGIC); + __m512d mNegL1 = _mm512_set1_pd(NPY_TANG_NEG_L1); + __m512d mNegL2 = _mm512_set1_pd(NPY_TANG_NEG_L2); + __m512i mMod = _mm512_set1_epi64(0x1f); + __m512d mA1 = _mm512_set1_pd(NPY_TANG_A1); + __m512d mA2 = _mm512_set1_pd(NPY_TANG_A2); + __m512d mA3 = _mm512_set1_pd(NPY_TANG_A3); + __m512d mA4 = _mm512_set1_pd(NPY_TANG_A4); + __m512d mA5 = _mm512_set1_pd(NPY_TANG_A5); + __m512d mTH_nearzero = _mm512_set1_pd(0x1p-54); + __m512d mTH_max = _mm512_set1_pd(0x1.62e42fefa39efp+9); + __m512d mTH_min = _mm512_set1_pd(-0x1.74910d52d3053p+9); + __m512d mTH_inf = _mm512_set1_pd(NPY_INFINITY); + __m512d zeros_d = _mm512_set1_pd(0.0f); + __m512d ones_d = _mm512_set1_pd(1.0f); + __m256i vindex = _mm256_loadu_si256((__m256i*)&indexarr[0]); + + __m512d mTable_top_0 = _mm512_loadu_pd(&(EXP_Table_top[8*0])); + __m512d mTable_top_1 = _mm512_loadu_pd(&(EXP_Table_top[8*1])); + __m512d mTable_top_2 = _mm512_loadu_pd(&(EXP_Table_top[8*2])); + __m512d mTable_top_3 = _mm512_loadu_pd(&(EXP_Table_top[8*3])); + __m512d mTable_tail_0 = _mm512_loadu_pd(&(EXP_Table_tail[8*0])); + __m512d mTable_tail_1 = _mm512_loadu_pd(&(EXP_Table_tail[8*1])); + __m512d mTable_tail_2 = _mm512_loadu_pd(&(EXP_Table_tail[8*2])); + __m512d mTable_tail_3 = _mm512_loadu_pd(&(EXP_Table_tail[8*3])); + + __mmask8 overflow_mask = avx512_get_partial_load_mask_pd(0, num_lanes); + __mmask8 load_mask = avx512_get_full_load_mask_pd(); + __mmask8 xmin_mask, xmax_mask, inf_mask, nan_mask, nearzero_mask; + + while (num_remaining_elements > 0) { + if (num_remaining_elements < num_lanes) { + load_mask = avx512_get_partial_load_mask_pd(num_remaining_elements, + num_lanes); + } + + __m512d x; + if (1 == stride) { + x = avx512_masked_load_pd(load_mask, ip); + } + else { + x = avx512_masked_gather_pd(zeros_d, ip, vindex, load_mask); + } + + nan_mask = _mm512_cmp_pd_mask(x, x, _CMP_NEQ_UQ); + x = avx512_set_masked_lanes_pd(x, zeros_d, nan_mask); + xmax_mask = _mm512_cmp_pd_mask(x, mTH_max, _CMP_GT_OQ); + xmin_mask = _mm512_cmp_pd_mask(x, mTH_min, _CMP_LT_OQ); + inf_mask = _mm512_cmp_pd_mask(x, mTH_inf, _CMP_EQ_OQ); + __m512i x_abs = _mm512_and_epi64(_mm512_castpd_si512(x), + _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF)); + nearzero_mask = _mm512_cmp_pd_mask(_mm512_castsi512_pd(x_abs), + mTH_nearzero, _CMP_LT_OQ); + nearzero_mask = _mm512_kxor(nearzero_mask, nan_mask); + overflow_mask = _mm512_kor(overflow_mask, + _mm512_kxor(xmax_mask, inf_mask)); + x = avx512_set_masked_lanes_pd(x, zeros_d, + _mm512_kor(_mm512_kor(nan_mask, xmin_mask), + _mm512_kor(xmax_mask, nearzero_mask))); + + /* z = x * 32/ln2 */ + __m512d z = _mm512_mul_pd(x, InvLn2N); + + /* round to nearest */ + __m512d kd = _mm512_add_pd(z, mShift); + __m512i ki = _mm512_castpd_si512(kd); + kd = _mm512_sub_pd(kd, mShift); + + /* r = (x + kd*mNegL1) + kd*mNegL2 */ + __m512d r1 = _mm512_fmadd_pd(kd, mNegL1, x); + __m512d r2 = _mm512_mul_pd(kd, mNegL2); + __m512d r = _mm512_add_pd(r1,r2); + + /* Polynomial approximation for exp(r) - 1 */ + __m512d q = _mm512_fmadd_pd(mA5, r, mA4); + q = _mm512_fmadd_pd(q, r, mA3); + q = _mm512_fmadd_pd(q, r, mA2); + q = _mm512_fmadd_pd(q, r, mA1); + q = _mm512_mul_pd(q, r); + __m512d p = _mm512_fmadd_pd(r, q, r2);; + p = _mm512_add_pd(r1, p); + + /* Get 2^(j/32) from lookup table */ + __m512i j = _mm512_and_epi64(ki, mMod); + __m512d top = avx512_permute_x4var_pd(mTable_top_0, mTable_top_1, + mTable_top_2, mTable_top_3, j); + __m512d tail = avx512_permute_x4var_pd(mTable_tail_0, mTable_tail_1, + mTable_tail_2, mTable_tail_3, j); + + /* + * s = top + tail; + * exp(x) = 2^m * (top + (tail + s * p)); + */ + __m512d s = _mm512_add_pd(top, tail); + __m512d res = _mm512_fmadd_pd(s, p, tail); + res = _mm512_add_pd(res, top); + res= _mm512_scalef_pd(res, _mm512_div_pd(kd, _mm512_set1_pd(32))); + + /* return special cases */ + res = avx512_set_masked_lanes_pd(res, _mm512_add_pd(x, ones_d), + nearzero_mask); + res = avx512_set_masked_lanes_pd(res, _mm512_set1_pd(NPY_NAN), + nan_mask); + res = avx512_set_masked_lanes_pd(res, mTH_inf, xmax_mask); + res = avx512_set_masked_lanes_pd(res, zeros_d, xmin_mask); + + _mm512_mask_storeu_pd(op, load_mask, res); + + ip += num_lanes * stride; + op += num_lanes; + num_remaining_elements -= num_lanes; + } + if (overflow_mask) { + npy_set_floatstatus_overflow(); + } +} +#endif +#endif + /**begin repeat * #TYPE = CFLOAT, CDOUBLE# * #type = npy_float, npy_double# diff --git a/numpy/core/tests/data/umath-validation-set-exp b/numpy/core/tests/data/umath-validation-set-exp index 1b2cc9ce4..7c5ef3b33 100644 --- a/numpy/core/tests/data/umath-validation-set-exp +++ b/numpy/core/tests/data/umath-validation-set-exp @@ -133,3 +133,280 @@ np.float32,0xc29b43d5,0x077ffffc,3 np.float32,0xc1e61ff7,0x2ab504f5,3 np.float32,0xc2867878,0x0effff15,3 np.float32,0xc2a2324a,0x04fffff4,3 +#float64 +## near zero ## +np.float64,0x8000000000000000,0x3ff0000000000000,1 +np.float64,0x8010000000000000,0x3ff0000000000000,1 +np.float64,0x8000000000000001,0x3ff0000000000000,1 +np.float64,0x8360000000000000,0x3ff0000000000000,1 +np.float64,0x9a70000000000000,0x3ff0000000000000,1 +np.float64,0xb9b0000000000000,0x3ff0000000000000,1 +np.float64,0xb810000000000000,0x3ff0000000000000,1 +np.float64,0xbc30000000000000,0x3ff0000000000000,1 +np.float64,0xb6a0000000000000,0x3ff0000000000000,1 +np.float64,0x0000000000000000,0x3ff0000000000000,1 +np.float64,0x0010000000000000,0x3ff0000000000000,1 +np.float64,0x0000000000000001,0x3ff0000000000000,1 +np.float64,0x0360000000000000,0x3ff0000000000000,1 +np.float64,0x1a70000000000000,0x3ff0000000000000,1 +np.float64,0x3c30000000000000,0x3ff0000000000000,1 +np.float64,0x36a0000000000000,0x3ff0000000000000,1 +np.float64,0x39b0000000000000,0x3ff0000000000000,1 +np.float64,0x3810000000000000,0x3ff0000000000000,1 +## underflow ## +np.float64,0xc0c6276800000000,0x0000000000000000,1 +np.float64,0xc0c62d918ce2421d,0x0000000000000000,1 +np.float64,0xc0c62d918ce2421e,0x0000000000000000,1 +np.float64,0xc0c62d91a0000000,0x0000000000000000,1 +np.float64,0xc0c62d9180000000,0x0000000000000000,1 +np.float64,0xc0c62dea45ee3e06,0x0000000000000000,1 +np.float64,0xc0c62dea45ee3e07,0x0000000000000000,1 +np.float64,0xc0c62dea40000000,0x0000000000000000,1 +np.float64,0xc0c62dea60000000,0x0000000000000000,1 +np.float64,0xc0875f1120000000,0x0000000000000000,1 +np.float64,0xc0875f113c30b1c8,0x0000000000000000,1 +np.float64,0xc0875f1140000000,0x0000000000000000,1 +np.float64,0xc093480000000000,0x0000000000000000,1 +np.float64,0xffefffffffffffff,0x0000000000000000,1 +np.float64,0xc7efffffe0000000,0x0000000000000000,1 +## overflow ## +np.float64,0x40862e52fefa39ef,0x7ff0000000000000,1 +np.float64,0x40872e42fefa39ef,0x7ff0000000000000,1 +## +/- INF, +/- NAN ## +np.float64,0x7ff0000000000000,0x7ff0000000000000,1 +np.float64,0xfff0000000000000,0x0000000000000000,1 +np.float64,0x7ff8000000000000,0x7ff8000000000000,1 +np.float64,0xfff8000000000000,0xfff8000000000000,1 +## output denormal ## +np.float64,0xc087438520000000,0x0000000000000001,1 +np.float64,0xc08743853f2f4461,0x0000000000000001,1 +np.float64,0xc08743853f2f4460,0x0000000000000001,1 +np.float64,0xc087438540000000,0x0000000000000001,1 +## between -745.13321910 and 709.78271289 ## +np.float64,0xbff760cd14774bd9,0x3fcdb14ced00ceb6,1 +np.float64,0xbff760cd20000000,0x3fcdb14cd7993879,1 +np.float64,0xbff760cd00000000,0x3fcdb14d12fbd264,1 +np.float64,0xc07f1cf360000000,0x130c1b369af14fda,1 +np.float64,0xbeb0000000000000,0x3feffffe00001000,1 +np.float64,0xbd70000000000000,0x3fefffffffffe000,1 +np.float64,0xc084fd46e5c84952,0x0360000000000139,1 +np.float64,0xc084fd46e5c84953,0x035ffffffffffe71,1 +np.float64,0xc084fd46e0000000,0x0360000b9096d32c,1 +np.float64,0xc084fd4700000000,0x035fff9721d12104,1 +np.float64,0xc086232bc0000000,0x0010003af5e64635,1 +np.float64,0xc086232bdd7abcd2,0x001000000000007c,1 +np.float64,0xc086232bdd7abcd3,0x000ffffffffffe7c,1 +np.float64,0xc086232be0000000,0x000ffffaf57a6fc9,1 +np.float64,0xc086233920000000,0x000fe590e3b45eb0,1 +np.float64,0xc086233938000000,0x000fe56133493c57,1 +np.float64,0xc086233940000000,0x000fe5514deffbbc,1 +np.float64,0xc086234c98000000,0x000fbf1024c32ccb,1 +np.float64,0xc086234ca0000000,0x000fbf0065bae78d,1 +np.float64,0xc086234c80000000,0x000fbf3f623a7724,1 +np.float64,0xc086234ec0000000,0x000fbad237c846f9,1 +np.float64,0xc086234ec8000000,0x000fbac27cfdec97,1 +np.float64,0xc086234ee0000000,0x000fba934cfd3dc2,1 +np.float64,0xc086234ef0000000,0x000fba73d7f618d9,1 +np.float64,0xc086234f00000000,0x000fba54632dddc0,1 +np.float64,0xc0862356e0000000,0x000faae0945b761a,1 +np.float64,0xc0862356f0000000,0x000faac13eb9a310,1 +np.float64,0xc086235700000000,0x000faaa1e9567b0a,1 +np.float64,0xc086236020000000,0x000f98cd75c11ed7,1 +np.float64,0xc086236ca0000000,0x000f8081b4d93f89,1 +np.float64,0xc086236cb0000000,0x000f8062b3f4d6c5,1 +np.float64,0xc086236cc0000000,0x000f8043b34e6f8c,1 +np.float64,0xc086238d98000000,0x000f41220d9b0d2c,1 +np.float64,0xc086238da0000000,0x000f4112cc80a01f,1 +np.float64,0xc086238d80000000,0x000f414fd145db5b,1 +np.float64,0xc08624fd00000000,0x000cbfce8ea1e6c4,1 +np.float64,0xc086256080000000,0x000c250747fcd46e,1 +np.float64,0xc08626c480000000,0x000a34f4bd975193,1 +np.float64,0xbf50000000000000,0x3feff800ffeaac00,1 +np.float64,0xbe10000000000000,0x3fefffffff800000,1 +np.float64,0xbcd0000000000000,0x3feffffffffffff8,1 +np.float64,0xc055d589e0000000,0x38100004bf94f63e,1 +np.float64,0xc055d58a00000000,0x380ffff97f292ce8,1 +np.float64,0xbfd962d900000000,0x3fe585a4b00110e1,1 +np.float64,0x3ff4bed280000000,0x400d411e7a58a303,1 +np.float64,0x3fff0b3620000000,0x401bd7737ffffcf3,1 +np.float64,0x3ff0000000000000,0x4005bf0a8b145769,1 +np.float64,0x3eb0000000000000,0x3ff0000100000800,1 +np.float64,0x3d70000000000000,0x3ff0000000001000,1 +np.float64,0x40862e42e0000000,0x7fefff841808287f,1 +np.float64,0x40862e42fefa39ef,0x7fefffffffffff2a,1 +np.float64,0x40862e0000000000,0x7feef85a11e73f2d,1 +np.float64,0x4000000000000000,0x401d8e64b8d4ddae,1 +np.float64,0x4009242920000000,0x40372a52c383a488,1 +np.float64,0x4049000000000000,0x44719103e4080b45,1 +np.float64,0x4008000000000000,0x403415e5bf6fb106,1 +np.float64,0x3f50000000000000,0x3ff00400800aab55,1 +np.float64,0x3e10000000000000,0x3ff0000000400000,1 +np.float64,0x3cd0000000000000,0x3ff0000000000004,1 +np.float64,0x40562e40a0000000,0x47effed088821c3f,1 +np.float64,0x40562e42e0000000,0x47effff082e6c7ff,1 +np.float64,0x40562e4300000000,0x47f00000417184b8,1 +np.float64,0x3fe8000000000000,0x4000ef9db467dcf8,1 +np.float64,0x402b12e8d4f33589,0x412718f68c71a6fe,1 +np.float64,0x402b12e8d4f3358a,0x412718f68c71a70a,1 +np.float64,0x402b12e8c0000000,0x412718f59a7f472e,1 +np.float64,0x402b12e8e0000000,0x412718f70c0eac62,1 +##use 1th entry +np.float64,0x40631659AE147CB4,0x4db3a95025a4890f,1 +np.float64,0xC061B87D2E85A4E2,0x332640c8e2de2c51,1 +np.float64,0x405A4A50BE243AF4,0x496a45e4b7f0339a,1 +np.float64,0xC0839898B98EC5C6,0x0764027828830df4,1 +#use 2th entry +np.float64,0xC072428C44B6537C,0x2596ade838b96f3e,1 +np.float64,0xC053057C5E1AE9BF,0x3912c8fad18fdadf,1 +np.float64,0x407E89C78328BAA3,0x6bfe35d5b9a1a194,1 +np.float64,0x4083501B6DD87112,0x77a855503a38924e,1 +#use 3th entry +np.float64,0x40832C6195F24540,0x7741e73c80e5eb2f,1 +np.float64,0xC083D4CD557C2EC9,0x06b61727c2d2508e,1 +np.float64,0x400C48F5F67C99BD,0x404128820f02b92e,1 +np.float64,0x4056E36D9B2DF26A,0x4830f52ff34a8242,1 +#use 4th entry +np.float64,0x4080FF700D8CBD06,0x70fa70df9bc30f20,1 +np.float64,0x406C276D39E53328,0x543eb8e20a8f4741,1 +np.float64,0xC070D6159BBD8716,0x27a4a0548c904a75,1 +np.float64,0xC052EBCF8ED61F83,0x391c0e92368d15e4,1 +#use 5th entry +np.float64,0xC061F892A8AC5FBE,0x32f807a89efd3869,1 +np.float64,0x4021D885D2DBA085,0x40bd4dc86d3e3270,1 +np.float64,0x40767AEEEE7D4FCF,0x605e22851ee2afb7,1 +np.float64,0xC0757C5D75D08C80,0x20f0751599b992a2,1 +#use 6th entry +np.float64,0x405ACF7A284C4CE3,0x499a4e0b7a27027c,1 +np.float64,0xC085A6C9E80D7AF5,0x0175914009d62ec2,1 +np.float64,0xC07E4C02F86F1DAE,0x1439269b29a9231e,1 +np.float64,0x4080D80F9691CC87,0x7088a6cdafb041de,1 +#use 7th entry +np.float64,0x407FDFD84FBA0AC1,0x6deb1ae6f9bc4767,1 +np.float64,0x40630C06A1A2213D,0x4dac7a9d51a838b7,1 +np.float64,0x40685FDB30BB8B4F,0x5183f5cc2cac9e79,1 +np.float64,0x408045A2208F77F4,0x6ee299e08e2aa2f0,1 +#use 8th entry +np.float64,0xC08104E391F5078B,0x0ed397b7cbfbd230,1 +np.float64,0xC031501CAEFAE395,0x3e6040fd1ea35085,1 +np.float64,0xC079229124F6247C,0x1babf4f923306b1e,1 +np.float64,0x407FB65F44600435,0x6db03beaf2512b8a,1 +#use 9th entry +np.float64,0xC07EDEE8E8E8A5AC,0x136536cec9cbef48,1 +np.float64,0x4072BB4086099A14,0x5af4d3c3008b56cc,1 +np.float64,0x4050442A2EC42CB4,0x45cd393bd8fad357,1 +np.float64,0xC06AC28FB3D419B4,0x2ca1b9d3437df85f,1 +#use 10th entry +np.float64,0x40567FC6F0A68076,0x480c977fd5f3122e,1 +np.float64,0x40620A2F7EDA59BB,0x4cf278e96f4ce4d7,1 +np.float64,0xC085044707CD557C,0x034aad6c968a045a,1 +np.float64,0xC07374EA5AC516AA,0x23dd6afdc03e83d5,1 +#use 11th entry +np.float64,0x4073CC95332619C1,0x5c804b1498bbaa54,1 +np.float64,0xC0799FEBBE257F31,0x1af6a954c43b87d2,1 +np.float64,0x408159F19EA424F6,0x7200858efcbfc84d,1 +np.float64,0x404A81F6F24C0792,0x44b664a07ce5bbfa,1 +#use 12th entry +np.float64,0x40295FF1EFB9A741,0x4113c0e74c52d7b0,1 +np.float64,0x4073975F4CC411DA,0x5c32be40b4fec2c1,1 +np.float64,0x406E9DE52E82A77E,0x56049c9a3f1ae089,1 +np.float64,0x40748C2F52560ED9,0x5d93bc14fd4cd23b,1 +#use 13th entry +np.float64,0x4062A553CDC4D04C,0x4d6266bfde301318,1 +np.float64,0xC079EC1D63598AB7,0x1a88cb184dab224c,1 +np.float64,0xC0725C1CB3167427,0x25725b46f8a081f6,1 +np.float64,0x407888771D9B45F9,0x6353b1ec6bd7ce80,1 +#use 14th entry +np.float64,0xC082CBA03AA89807,0x09b383723831ce56,1 +np.float64,0xC083A8961BB67DD7,0x0735b118d5275552,1 +np.float64,0xC076BC6ECA12E7E3,0x1f2222679eaef615,1 +np.float64,0xC072752503AA1A5B,0x254eb832242c77e1,1 +#use 15th entry +np.float64,0xC058800792125DEC,0x371882372a0b48d4,1 +np.float64,0x4082909FD863E81C,0x7580d5f386920142,1 +np.float64,0xC071616F8FB534F9,0x26dbe20ef64a412b,1 +np.float64,0x406D1AB571CAA747,0x54ee0d55cb38ac20,1 +#use 16th entry +np.float64,0x406956428B7DAD09,0x52358682c271237f,1 +np.float64,0xC07EFC2D9D17B621,0x133b3e77c27a4d45,1 +np.float64,0xC08469BAC5BA3CCA,0x050863e5f42cc52f,1 +np.float64,0x407189D9626386A5,0x593cb1c0b3b5c1d3,1 +#use 17th entry +np.float64,0x4077E652E3DEB8C6,0x6269a10dcbd3c752,1 +np.float64,0x407674C97DB06878,0x605485dcc2426ec2,1 +np.float64,0xC07CE9969CF4268D,0x16386cf8996669f2,1 +np.float64,0x40780EE32D5847C4,0x62a436bd1abe108d,1 +#use 18th entry +np.float64,0x4076C3AA5E1E8DA1,0x60c62f56a5e72e24,1 +np.float64,0xC0730AFC7239B9BE,0x24758ead095cec1e,1 +np.float64,0xC085CC2B9C420DDB,0x0109cdaa2e5694c1,1 +np.float64,0x406D0765CB6D7AA4,0x54e06f8dd91bd945,1 +#use 19th entry +np.float64,0xC082D011F3B495E7,0x09a6647661d279c2,1 +np.float64,0xC072826AF8F6AFBC,0x253acd3cd224507e,1 +np.float64,0x404EB9C4810CEA09,0x457933dbf07e8133,1 +np.float64,0x408284FBC97C58CE,0x755f6eb234aa4b98,1 +#use 20th entry +np.float64,0x40856008CF6EDC63,0x7d9c0b3c03f4f73c,1 +np.float64,0xC077CB2E9F013B17,0x1d9b3d3a166a55db,1 +np.float64,0xC0479CA3C20AD057,0x3bad40e081555b99,1 +np.float64,0x40844CD31107332A,0x7a821d70aea478e2,1 +#use 21th entry +np.float64,0xC07C8FCC0BFCC844,0x16ba1cc8c539d19b,1 +np.float64,0xC085C4E9A3ABA488,0x011ff675ba1a2217,1 +np.float64,0x4074D538B32966E5,0x5dfd9d78043c6ad9,1 +np.float64,0xC0630CA16902AD46,0x3231a446074cede6,1 +#use 22th entry +np.float64,0xC06C826733D7D0B7,0x2b5f1078314d41e1,1 +np.float64,0xC0520DF55B2B907F,0x396c13a6ce8e833e,1 +np.float64,0xC080712072B0F437,0x107eae02d11d98ea,1 +np.float64,0x40528A6150E19EFB,0x469fdabda02228c5,1 +#use 23th entry +np.float64,0xC07B1D74B6586451,0x18d1253883ae3b48,1 +np.float64,0x4045AFD7867DAEC0,0x43d7d634fc4c5d98,1 +np.float64,0xC07A08B91F9ED3E2,0x1a60973e6397fc37,1 +np.float64,0x407B3ECF0AE21C8C,0x673e03e9d98d7235,1 +#use 24th entry +np.float64,0xC078AEB6F30CEABF,0x1c530b93ab54a1b3,1 +np.float64,0x4084495006A41672,0x7a775b6dc7e63064,1 +np.float64,0x40830B1C0EBF95DD,0x76e1e6eed77cfb89,1 +np.float64,0x407D93E8F33D8470,0x6a9adbc9e1e4f1e5,1 +#use 25th entry +np.float64,0x4066B11A09EFD9E8,0x504dd528065c28a7,1 +np.float64,0x408545823723AEEB,0x7d504a9b1844f594,1 +np.float64,0xC068C711F2CA3362,0x2e104f3496ea118e,1 +np.float64,0x407F317FCC3CA873,0x6cf0732c9948ebf4,1 +#use 26th entry +np.float64,0x407AFB3EBA2ED50F,0x66dc28a129c868d5,1 +np.float64,0xC075377037708ADE,0x21531a329f3d793e,1 +np.float64,0xC07C30066A1F3246,0x174448baa16ded2b,1 +np.float64,0xC06689A75DE2ABD3,0x2fad70662fae230b,1 +#use 27th entry +np.float64,0x4081514E9FCCF1E0,0x71e673b9efd15f44,1 +np.float64,0xC0762C710AF68460,0x1ff1ed7d8947fe43,1 +np.float64,0xC0468102FF70D9C4,0x3be0c3a8ff3419a3,1 +np.float64,0xC07EA4CEEF02A83E,0x13b908f085102c61,1 +#use 28th entry +np.float64,0xC06290B04AE823C4,0x328a83da3c2e3351,1 +np.float64,0xC0770EB1D1C395FB,0x1eab281c1f1db5fe,1 +np.float64,0xC06F5D4D838A5BAE,0x29500ea32fb474ea,1 +np.float64,0x40723B3133B54C5D,0x5a3c82c7c3a2b848,1 +#use 29th entry +np.float64,0x4085E6454CE3B4AA,0x7f20319b9638d06a,1 +np.float64,0x408389F2A0585D4B,0x7850667c58aab3d0,1 +np.float64,0xC0382798F9C8AE69,0x3dc1c79fe8739d6d,1 +np.float64,0xC08299D827608418,0x0a4335f76cdbaeb5,1 +#use 30th entry +np.float64,0xC06F3DED43301BF1,0x2965670ae46750a8,1 +np.float64,0xC070CAF6BDD577D9,0x27b4aa4ffdd29981,1 +np.float64,0x4078529AD4B2D9F2,0x6305c12755d5e0a6,1 +np.float64,0xC055B14E75A31B96,0x381c2eda6d111e5d,1 +#use 31th entry +np.float64,0x407B13EE414FA931,0x6700772c7544564d,1 +np.float64,0x407EAFDE9DE3EC54,0x6c346a0e49724a3c,1 +np.float64,0xC08362F398B9530D,0x07ffeddbadf980cb,1 +np.float64,0x407E865CDD9EEB86,0x6bf866cac5e0d126,1 +#use 32th entry +np.float64,0x407FB62DBC794C86,0x6db009f708ac62cb,1 +np.float64,0xC063D0BAA68CDDDE,0x31a3b2a51ce50430,1 +np.float64,0xC05E7706A2231394,0x34f24bead6fab5c9,1 +np.float64,0x4083E3A06FDE444E,0x79527b7a386d1937,1 diff --git a/numpy/core/tests/test_umath.py b/numpy/core/tests/test_umath.py index 233a0b1d6..60c9fe437 100644 --- a/numpy/core/tests/test_umath.py +++ b/numpy/core/tests/test_umath.py @@ -640,6 +640,16 @@ class TestExp: yf = np.array(y, dtype=dt)*log2_ assert_almost_equal(np.exp(yf), xf) + def test_exp_strides(self): + np.random.seed(42) + strides = np.array([-4,-3,-2,-1,1,2,3,4]) + sizes = np.arange(2,100) + for ii in sizes: + x_f64 = np.float64(np.random.uniform(low=0.01, high=709.1,size=ii)) + y_true = np.exp(x_f64) + for jj in strides: + assert_array_almost_equal_nulp(np.exp(x_f64[::jj]), y_true[::jj], nulp=2) + class TestSpecialFloats: def test_exp_values(self): x = [np.nan, np.nan, np.inf, 0.] @@ -652,6 +662,8 @@ class TestSpecialFloats: with np.errstate(over='raise'): assert_raises(FloatingPointError, np.exp, np.float32(100.)) assert_raises(FloatingPointError, np.exp, np.float32(1E19)) + assert_raises(FloatingPointError, np.exp, np.float64(800.)) + assert_raises(FloatingPointError, np.exp, np.float64(1E19)) def test_log_values(self): with np.errstate(all='ignore'): diff --git a/numpy/core/tests/test_umath_accuracy.py b/numpy/core/tests/test_umath_accuracy.py index fd7214396..e3c2eb025 100644 --- a/numpy/core/tests/test_umath_accuracy.py +++ b/numpy/core/tests/test_umath_accuracy.py @@ -3,7 +3,7 @@ import platform from os import path import sys import pytest -from ctypes import c_float, c_int, cast, pointer, POINTER +from ctypes import c_longlong, c_double, c_float, c_int, cast, pointer, POINTER from numpy.testing import assert_array_max_ulp from numpy.core._multiarray_umath import __cpu_features__ @@ -16,10 +16,15 @@ platform_skip = pytest.mark.skipif(not runtest, # convert string to hex function taken from: # https://stackoverflow.com/questions/1592158/convert-hex-to-float # -def convert(s): +def convert(s, datatype="np.float32"): i = int(s, 16) # convert from hex to a Python int - cp = pointer(c_int(i)) # make this into a c integer - fp = cast(cp, POINTER(c_float)) # cast the int pointer to a float pointer + if (datatype == "np.float64"): + cp = pointer(c_longlong(i)) # make this into a c long long integer + fp = cast(cp, POINTER(c_double)) # cast the int pointer to a double pointer + else: + cp = pointer(c_int(i)) # make this into a c integer + fp = cast(cp, POINTER(c_float)) # cast the int pointer to a float pointer + return fp.contents.value # dereference the pointer, get the float str_to_float = np.vectorize(convert) @@ -45,8 +50,8 @@ class TestAccuracy: npfunc = getattr(np, filename.split('-')[3]) for datatype in np.unique(data['type']): data_subset = data[data['type'] == datatype] - inval = np.array(str_to_float(data_subset['input'].astype(str)), dtype=eval(datatype)) - outval = np.array(str_to_float(data_subset['output'].astype(str)), dtype=eval(datatype)) + inval = np.array(str_to_float(data_subset['input'].astype(str), data_subset['type'].astype(str)), dtype=eval(datatype)) + outval = np.array(str_to_float(data_subset['output'].astype(str), data_subset['type'].astype(str)), dtype=eval(datatype)) perm = np.random.permutation(len(inval)) inval = inval[perm] outval = outval[perm] |