diff options
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/core/code_generators/generate_umath.py | 1 | ||||
-rw-r--r-- | numpy/core/include/numpy/npy_math.h | 85 | ||||
-rw-r--r-- | numpy/core/src/umath/loops.c.src | 20 | ||||
-rw-r--r-- | numpy/core/src/umath/loops.h.src | 6 | ||||
-rw-r--r-- | numpy/core/src/umath/simd.inc.src | 181 | ||||
-rw-r--r-- | numpy/core/tests/data/umath-validation-set-exp | 108 | ||||
-rw-r--r-- | numpy/core/tests/test_umath_accuracy.py | 17 |
7 files changed, 412 insertions, 6 deletions
diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py index c14711d16..52ae3cdd7 100644 --- a/numpy/core/code_generators/generate_umath.py +++ b/numpy/core/code_generators/generate_umath.py @@ -702,6 +702,7 @@ defdict = { None, TD('e', f='exp', astype={'e':'f'}), TD('f', simd=[('fma', 'f'), ('avx512f', 'f')]), + TD('d', simd=[('avx512f', 'd')]), TD('fdg' + cmplx, f='exp'), TD(P, f='exp'), ), diff --git a/numpy/core/include/numpy/npy_math.h b/numpy/core/include/numpy/npy_math.h index 69e690f28..7b29d5205 100644 --- a/numpy/core/include/numpy/npy_math.h +++ b/numpy/core/include/numpy/npy_math.h @@ -130,6 +130,91 @@ NPY_INLINE static float __npy_nzerof(void) #define NPY_COEFF_Q2_EXPf 2.159509375685829852307e-02f /* + * Constants used in vector implementation of float64 exp(x) + */ +#define NPY_RINT_CVT_MAGIC 0x1.8p52 +#define NPY_INV_LN2_MUL_32 0x1.71547652b82fep+5 +#define NPY_TANG_NEG_L1 -0x1.62e42fefp-6 +#define NPY_TANG_NEG_L2 -0x1.473de6af278edp-39 +#define NPY_TANG_A1 0x1p-1 +#define NPY_TANG_A2 0x1.5555555548f7cp-3 +#define NPY_TANG_A3 0x1.5555555545d4ep-5 +#define NPY_TANG_A4 0x1.11115b7aa905ep-7 +#define NPY_TANG_A5 0x1.6c1728d739765p-10 + +/* Lookup table for 2^(j/32) */ +static npy_uint64 EXP_Table_top[32] = { + 0x3FF0000000000000, + 0x3FF059B0D3158540, + 0x3FF0B5586CF98900, + 0x3FF11301D0125B40, + 0x3FF172B83C7D5140, + 0x3FF1D4873168B980, + 0x3FF2387A6E756200, + 0x3FF29E9DF51FDEC0, + 0x3FF306FE0A31B700, + 0x3FF371A7373AA9C0, + 0x3FF3DEA64C123400, + 0x3FF44E0860618900, + 0x3FF4BFDAD5362A00, + 0x3FF5342B569D4F80, + 0x3FF5AB07DD485400, + 0x3FF6247EB03A5580, + 0x3FF6A09E667F3BC0, + 0x3FF71F75E8EC5F40, + 0x3FF7A11473EB0180, + 0x3FF82589994CCE00, + 0x3FF8ACE5422AA0C0, + 0x3FF93737B0CDC5C0, + 0x3FF9C49182A3F080, + 0x3FFA5503B23E2540, + 0x3FFAE89F995AD380, + 0x3FFB7F76F2FB5E40, + 0x3FFC199BDD855280, + 0x3FFCB720DCEF9040, + 0x3FFD5818DCFBA480, + 0x3FFDFC97337B9B40, + 0x3FFEA4AFA2A490C0, + 0x3FFF50765B6E4540, +}; + +static npy_uint64 EXP_Table_tail[32] = { + 0x0000000000000000, + 0x3D0A1D73E2A475B4, + 0x3CEEC5317256E308, + 0x3CF0A4EBBF1AED93, + 0x3D0D6E6FBE462876, + 0x3D053C02DC0144C8, + 0x3D0C3360FD6D8E0B, + 0x3D009612E8AFAD12, + 0x3CF52DE8D5A46306, + 0x3CE54E28AA05E8A9, + 0x3D011ADA0911F09F, + 0x3D068189B7A04EF8, + 0x3D038EA1CBD7F621, + 0x3CBDF0A83C49D86A, + 0x3D04AC64980A8C8F, + 0x3CD2C7C3E81BF4B7, + 0x3CE921165F626CDD, + 0x3D09EE91B8797785, + 0x3CDB5F54408FDB37, + 0x3CF28ACF88AFAB35, + 0x3CFB5BA7C55A192D, + 0x3D027A280E1F92A0, + 0x3CF01C7C46B071F3, + 0x3CFC8B424491CAF8, + 0x3D06AF439A68BB99, + 0x3CDBAA9EC206AD4F, + 0x3CFC2220CB12A092, + 0x3D048A81E5E8F4A5, + 0x3CDC976816BAD9B8, + 0x3CFEB968CAC39ED3, + 0x3CF9858F73A18F5E, + 0x3C99D3E12DD8A18B, +}; + + +/* * Constants used in vector implementation of log(x) */ #define NPY_COEFF_P0_LOGf 0.000000000000000000000e+00f diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src index 9b43824cb..eea82309c 100644 --- a/numpy/core/src/umath/loops.c.src +++ b/numpy/core/src/umath/loops.c.src @@ -1558,6 +1558,15 @@ FLOAT_@func@(char **args, npy_intp const *dimensions, npy_intp const *steps, voi /**end repeat**/ +NPY_NO_EXPORT NPY_GCC_OPT_3 void +DOUBLE_exp(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data)) +{ + UNARY_LOOP { + const npy_double in1 = *(npy_double *)ip1; + *(npy_double *)op1 = npy_exp(in1); + } +} + /**begin repeat * #isa = avx512f, fma# * #ISA = AVX512F, FMA# @@ -1688,6 +1697,17 @@ FLOAT_@func@_@isa@(char **args, npy_intp const *dimensions, npy_intp const *step /**end repeat1**/ /**end repeat**/ +NPY_NO_EXPORT NPY_GCC_OPT_3 void +DOUBLE_exp_avx512f(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data)) +{ + if (!run_unary_avx512f_exp_DOUBLE(args, dimensions, steps)) { + UNARY_LOOP { + const npy_double in1 = *(npy_double *)ip1; + *(npy_double *)op1 = npy_exp(in1); + } + } +} + /**begin repeat * Float types diff --git a/numpy/core/src/umath/loops.h.src b/numpy/core/src/umath/loops.h.src index e9d0b4c62..50a7ccfee 100644 --- a/numpy/core/src/umath/loops.h.src +++ b/numpy/core/src/umath/loops.h.src @@ -196,6 +196,12 @@ NPY_NO_EXPORT void /**end repeat1**/ /**end repeat**/ +NPY_NO_EXPORT void +DOUBLE_exp(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)); + +NPY_NO_EXPORT void +DOUBLE_exp_avx512f(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func)); + /**begin repeat * #func = sin, cos, exp, log# */ diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src index 4265476b5..68a1b8aa7 100644 --- a/numpy/core/src/umath/simd.inc.src +++ b/numpy/core/src/umath/simd.inc.src @@ -387,6 +387,23 @@ run_unary_@isa@_sincos_FLOAT(char **args, npy_intp const *dimensions, npy_intp c /**end repeat**/ +#if defined HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS +static NPY_INLINE void +AVX512F_exp_DOUBLE(npy_double *, npy_double *, const npy_intp n, const npy_intp stride); +#endif +static NPY_INLINE int +run_unary_avx512f_exp_DOUBLE(char **args, npy_intp const *dimensions, npy_intp const *steps) +{ +#if defined HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS + if (IS_OUTPUT_BLOCKABLE_UNARY(sizeof(npy_double), 64)) { + AVX512F_exp_DOUBLE((npy_double*)args[1], (npy_double*)args[0], dimensions[0], steps[0]); + return 1; + } + else + return 0; +#endif + return 0; +} /**begin repeat * Float types @@ -1695,6 +1712,22 @@ avx512_scalef_ps(__m512 poly, __m512 quadrant) { return _mm512_scalef_ps(poly, quadrant); } + +static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F __m512d +avx512_permute_x4var_pd(__m512d t0, + __m512d t1, + __m512d t2, + __m512d t3, + __m512i index) +{ + + __mmask8 lut_mask = _mm512_cmp_epi64_mask(index, _mm512_set1_epi64(15), + _MM_CMPINT_GT); + __m512d res1 = _mm512_permutex2var_pd(t0, index, t1); + __m512d res2 = _mm512_permutex2var_pd(t2, index, t3); + return _mm512_mask_blend_pd(lut_mask, res1, res2); +} + /**begin repeat * #vsub = ps, pd# * #type= npy_float, npy_double# @@ -2654,6 +2687,154 @@ static NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ void #endif /**end repeat**/ +/* + * Vectorized implementation of exp double using AVX512 + * Reference: Tang, P.T.P., "Table-driven implementation of the + * exponential function in IEEE floating-point + * arithmetic," ACM Transactions on Mathematical + * Software, vol. 15, pp. 144-157, 1989. + * 1) if x > mTH_max or x is INF; return INF (overflow) + * 2) if x < mTH_min; return 0.0f (underflow) + * 3) if abs(x) < mTH_nearzero; return 1.0f + x + * 4) Range reduction: + * x = (32m + j)ln2 / 32 + r; r in [-ln2/64, ln2/64] + * 5) exp(r) - 1 is approximated by a polynomial function p(r) + * exp(x) = 2^m(2^(j/32) + 2^(j/32)p(r)); + */ +#if defined HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS +static NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F void +AVX512F_exp_DOUBLE(npy_double * op, + npy_double * ip, + const npy_intp array_size, + const npy_intp steps) +{ + npy_intp num_remaining_elements = array_size; + const npy_intp stride = steps / sizeof(npy_double); + const npy_int num_lanes = 64 / sizeof(npy_double); + npy_int indexarr[8]; + for (npy_int ii = 0; ii < 8; ii++) { + indexarr[ii] = ii*stride; + } + + __m512d InvLn2N = _mm512_set1_pd(NPY_INV_LN2_MUL_32); + __m512d mShift = _mm512_set1_pd(NPY_RINT_CVT_MAGIC); + __m512d mNegL1 = _mm512_set1_pd(NPY_TANG_NEG_L1); + __m512d mNegL2 = _mm512_set1_pd(NPY_TANG_NEG_L2); + __m512i mMod = _mm512_set1_epi64(0x1f); + __m512d mA1 = _mm512_set1_pd(NPY_TANG_A1); + __m512d mA2 = _mm512_set1_pd(NPY_TANG_A2); + __m512d mA3 = _mm512_set1_pd(NPY_TANG_A3); + __m512d mA4 = _mm512_set1_pd(NPY_TANG_A4); + __m512d mA5 = _mm512_set1_pd(NPY_TANG_A5); + __m512d mTH_nearzero = _mm512_set1_pd(0x1p-54); + __m512d mTH_max = _mm512_set1_pd(0x1.62e42fefa39efp+9); + __m512d mTH_min = _mm512_set1_pd(-0x1.74910d52d3053p+9); + __m512d mTH_inf = _mm512_set1_pd(NPY_INFINITY); + __m512d zeros_d = _mm512_set1_pd(0.0f); + __m512d ones_d = _mm512_set1_pd(1.0f); + __m256i vindex = _mm256_loadu_si256((__m256i*)&indexarr[0]); + + __m512d mTable_top_0 = _mm512_loadu_pd(&(EXP_Table_top[0])); + __m512d mTable_top_1 = _mm512_loadu_pd(&(EXP_Table_top[8])); + __m512d mTable_top_2 = _mm512_loadu_pd(&(EXP_Table_top[8*2])); + __m512d mTable_top_3 = _mm512_loadu_pd(&(EXP_Table_top[8*3])); + __m512d mTable_tail_0 = _mm512_loadu_pd(&(EXP_Table_tail[0])); + __m512d mTable_tail_1 = _mm512_loadu_pd(&(EXP_Table_tail[8])); + __m512d mTable_tail_2 = _mm512_loadu_pd(&(EXP_Table_tail[8*2])); + __m512d mTable_tail_3 = _mm512_loadu_pd(&(EXP_Table_tail[8*3])); + + __mmask8 overflow_mask = avx512_get_partial_load_mask_pd(0, num_lanes); + __mmask8 load_mask = avx512_get_full_load_mask_pd(); + __mmask8 xmin_mask, xmax_mask, inf_mask, nan_mask, nearzero_mask; + + while (num_remaining_elements > 0) { + if (num_remaining_elements < num_lanes) { + load_mask = avx512_get_partial_load_mask_pd(num_remaining_elements, + num_lanes); + } + + __m512d x; + if (1 == stride) { + x = avx512_masked_load_pd(load_mask, ip); + } + else { + x = avx512_masked_gather_pd(zeros_d, ip, vindex, load_mask); + } + + nan_mask = _mm512_cmp_pd_mask(x, x, _CMP_NEQ_UQ); + x = avx512_set_masked_lanes_pd(x, zeros_d, nan_mask); + xmax_mask = _mm512_cmp_pd_mask(x, mTH_max, _CMP_GT_OQ); + xmin_mask = _mm512_cmp_pd_mask(x, mTH_min, _CMP_LT_OQ); + inf_mask = _mm512_cmp_pd_mask(x, mTH_inf, _CMP_EQ_OQ); + __m512i x_abs = _mm512_and_epi64(_mm512_castpd_si512(x), + _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF)); + nearzero_mask = _mm512_cmp_pd_mask(_mm512_castsi512_pd(x_abs), + mTH_nearzero, _CMP_LT_OQ); + nearzero_mask = _mm512_kxor(nearzero_mask, nan_mask); + overflow_mask = _mm512_kor(overflow_mask, + _mm512_kxor(xmax_mask, inf_mask)); + x = avx512_set_masked_lanes_pd(x, zeros_d, + _mm512_kor(_mm512_kor(nan_mask, xmin_mask), + _mm512_kor(xmax_mask, nearzero_mask))); + + /* z = x * 32/ln2 */ + __m512d z = _mm512_mul_pd(x, InvLn2N); + + /* round to nearest */ + __m512d kd = _mm512_add_pd(z, mShift); + __m512i ki = _mm512_castpd_si512(kd); + kd = _mm512_sub_pd(kd, mShift); + + /* r = (x + kd*mNegL1) + kd*mNegL2 */ + __m512d r1 = _mm512_fmadd_pd(kd, mNegL1, x); + __m512d r2 = _mm512_mul_pd(kd, mNegL2); + __m512d r = _mm512_add_pd(r1,r2); + + /* Polynomial approximation for exp(r) - 1 */ + __m512d q = _mm512_fmadd_pd(mA5, r, mA4); + q = _mm512_fmadd_pd(q, r, mA3); + q = _mm512_fmadd_pd(q, r, mA2); + q = _mm512_fmadd_pd(q, r, mA1); + q = _mm512_mul_pd(q, r); + __m512d p = _mm512_fmadd_pd(r, q, r2);; + p = _mm512_add_pd(r1, p); + + /* Get 2^(j/32) from lookup table */ + __m512i j = _mm512_and_epi64(ki, mMod); + __m512d top = avx512_permute_x4var_pd(mTable_top_0, mTable_top_1, + mTable_top_2, mTable_top_3, j); + __m512d tail = avx512_permute_x4var_pd(mTable_tail_0, mTable_tail_1, + mTable_tail_2, mTable_tail_3, j); + + /* + * s = top + tail; + * exp(x) = 2^m * (top + (tail + s * p)); + */ + __m512d s = _mm512_add_pd(top, tail); + __m512d res = _mm512_fmadd_pd(s, p, tail); + res = _mm512_add_pd(res, top); + res= _mm512_scalef_pd(res, _mm512_div_pd(kd, _mm512_set1_pd(32))); + + /* return special cases */ + res = avx512_set_masked_lanes_pd(res, _mm512_add_pd(x, ones_d), + nearzero_mask); + res = avx512_set_masked_lanes_pd(res, _mm512_set1_pd(NPY_NAN), + nan_mask); + res = avx512_set_masked_lanes_pd(res, mTH_inf, xmax_mask); + res = avx512_set_masked_lanes_pd(res, zeros_d, xmin_mask); + + _mm512_mask_storeu_pd(op, load_mask, res); + + ip += num_lanes * stride; + op += num_lanes; + num_remaining_elements -= num_lanes; + } + if (overflow_mask) { + npy_set_floatstatus_overflow(); + } +} +#endif + /**begin repeat * #TYPE = CFLOAT, CDOUBLE# * #type = npy_float, npy_double# diff --git a/numpy/core/tests/data/umath-validation-set-exp b/numpy/core/tests/data/umath-validation-set-exp index 1b2cc9ce4..58bba300e 100644 --- a/numpy/core/tests/data/umath-validation-set-exp +++ b/numpy/core/tests/data/umath-validation-set-exp @@ -133,3 +133,111 @@ np.float32,0xc29b43d5,0x077ffffc,3 np.float32,0xc1e61ff7,0x2ab504f5,3 np.float32,0xc2867878,0x0effff15,3 np.float32,0xc2a2324a,0x04fffff4,3 +#float64 +#near zero +np.float64,0x8000000000000000, 0x3ff0000000000000,1 +np.float64,0x8010000000000000, 0x3ff0000000000000,1 +np.float64,0x8000000000000001, 0x3ff0000000000000,1 +np.float64,0x8360000000000000, 0x3ff0000000000000,1 +np.float64,0x9a70000000000000, 0x3ff0000000000000,1 +np.float64,0xb9b0000000000000, 0x3ff0000000000000,1 +np.float64,0xb810000000000000, 0x3ff0000000000000,1 +np.float64,0xbc30000000000000, 0x3ff0000000000000,1 +np.float64,0xb6a0000000000000, 0x3ff0000000000000,1 +np.float64,0x0000000000000000, 0x3ff0000000000000,1 +np.float64,0x0010000000000000, 0x3ff0000000000000,1 +np.float64,0x0000000000000001, 0x3ff0000000000000,1 +np.float64,0x0360000000000000, 0x3ff0000000000000,1 +np.float64,0x1a70000000000000, 0x3ff0000000000000,1 +np.float64,0x3c30000000000000, 0x3ff0000000000000,1 +np.float64,0x36a0000000000000, 0x3ff0000000000000,1 +np.float64,0x39b0000000000000, 0x3ff0000000000000,1 +np.float64,0x3810000000000000, 0x3ff0000000000000,1 +#underflow +np.float64,0xc0c6276800000000, 0x0000000000000000,1 +np.float64,0xc0c62d918ce2421d, 0x0000000000000000,1 +np.float64,0xc0c62d918ce2421e, 0x0000000000000000,1 +np.float64,0xc0c62d91a0000000, 0x0000000000000000,1 +np.float64,0xc0c62d9180000000, 0x0000000000000000,1 +np.float64,0xc0c62dea45ee3e06, 0x0000000000000000,1 +np.float64,0xc0c62dea45ee3e07, 0x0000000000000000,1 +np.float64,0xc0c62dea40000000, 0x0000000000000000,1 +np.float64,0xc0c62dea60000000, 0x0000000000000000,1 +np.float64,0xc087438520000000, 0x0000000000000001,1 +np.float64,0xc08743853f2f4461, 0x0000000000000001,1 +np.float64,0xc08743853f2f4460, 0x0000000000000001,1 +np.float64,0xc087438540000000, 0x0000000000000001,1 +np.float64,0xc0875f1120000000, 0x0000000000000000,1 +np.float64,0xc0875f113c30b1c8, 0x0000000000000000,1 +np.float64,0xc0875f1140000000, 0x0000000000000000,1 +np.float64,0xc093480000000000, 0x0000000000000000,1 +np.float64,0xffefffffffffffff, 0x0000000000000000,1 +np.float64,0xc7efffffe0000000, 0x0000000000000000,1 +#between −745.13321910 and 709.78271289 +np.float64,0xbff760cd14774bd9, 0x3fcdb14ced00ceb6,1 +np.float64,0xbff760cd20000000, 0x3fcdb14cd7993879,1 +np.float64,0xbff760cd00000000, 0x3fcdb14d12fbd264,1 +np.float64,0xc07f1cf360000000, 0x130c1b369af14fda,1 +np.float64,0xbeb0000000000000, 0x3feffffe00001000,1 +np.float64,0xbd70000000000000, 0x3fefffffffffe000,1 +np.float64,0xc084fd46e5c84952, 0x0360000000000139,1 +np.float64,0xc084fd46e5c84953, 0x035ffffffffffe71,1 +np.float64,0xc084fd46e0000000, 0x0360000b9096d32c,1 +np.float64,0xc084fd4700000000, 0x035fff9721d12104,1 +np.float64,0xc086232bc0000000, 0x0010003af5e64635,1 +np.float64,0xc086232bdd7abcd2, 0x001000000000007c,1 +np.float64,0xc086232bdd7abcd3, 0x000ffffffffffe7c,1 +np.float64,0xc086232be0000000, 0x000ffffaf57a6fc9,1 +np.float64,0xc086233920000000, 0x000fe590e3b45eb0,1 +np.float64,0xc086233938000000, 0x000fe56133493c57,1 +np.float64,0xc086233940000000, 0x000fe5514deffbbc,1 +np.float64,0xc086234c98000000, 0x000fbf1024c32ccb,1 +np.float64,0xc086234ca0000000, 0x000fbf0065bae78d,1 +np.float64,0xc086234c80000000, 0x000fbf3f623a7724,1 +np.float64,0xc086234ec0000000, 0x000fbad237c846f9,1 +np.float64,0xc086234ec8000000, 0x000fbac27cfdec97,1 +np.float64,0xc086234ee0000000, 0x000fba934cfd3dc2,1 +np.float64,0xc086234ef0000000, 0x000fba73d7f618d9,1 +np.float64,0xc086234f00000000, 0x000fba54632dddbf,1 +np.float64,0xc0862356e0000000, 0x000faae0945b761a,1 +np.float64,0xc0862356f0000000, 0x000faac13eb9a310,1 +np.float64,0xc086235700000000, 0x000faaa1e9567b0a,1 +np.float64,0xc086236020000000, 0x000f98cd75c11ed7,1 +np.float64,0xc086236ca0000000, 0x000f8081b4d93f89,1 +np.float64,0xc086236cb0000000, 0x000f8062b3f4d6c5,1 +np.float64,0xc086236cc0000000, 0x000f8043b34e6f8c,1 +np.float64,0xc086238d98000000, 0x000f41220d9b0d2c,1 +np.float64,0xc086238da0000000, 0x000f4112cc80a01f,1 +np.float64,0xc086238d80000000, 0x000f414fd145db5b,1 +np.float64,0xc08624fd00000000, 0x000cbfce8ea1e6c4,1 +np.float64,0xc086256080000000, 0x000c250747fcd46e,1 +np.float64,0xc08626c480000000, 0x000a34f4bd975193,1 +np.float64,0xbf50000000000000, 0x3feff800ffeaac00,1 +np.float64,0xbe10000000000000, 0x3fefffffff800000,1 +np.float64,0xbcd0000000000000, 0x3feffffffffffff8,1 +np.float64,0xc055d589e0000000, 0x38100004bf94f63e,1 +np.float64,0xc055d58a00000000, 0x380ffff97f292ce8,1 +np.float64,0xbfd962d900000000, 0x3fe585a4b00110e1,1 +np.float64,0x3ff4bed280000000, 0x400d411e7a58a303,1 +np.float64,0x3fff0b3620000000, 0x401bd7737ffffcf3,1 +np.float64,0x3ff0000000000000, 0x4005bf0a8b145769,1 +np.float64,0x3eb0000000000000, 0x3ff0000100000800,1 +np.float64,0x3d70000000000000, 0x3ff0000000001000,1 +np.float64,0x40862e42e0000000, 0x7fefff841808287f,1 +np.float64,0x40862e42fefa39ef, 0x7fefffffffffff2a,1 +np.float64,0x40862e0000000000, 0x7feef85a11e73f2d,1 +np.float64,0x4000000000000000, 0x401d8e64b8d4ddae,1 +np.float64,0x4009242920000000, 0x40372a52c383a488,1 +np.float64,0x4049000000000000, 0x44719103e4080b45,1 +np.float64,0x4008000000000000, 0x403415e5bf6fb106,1 +np.float64,0x3f50000000000000, 0x3ff00400800aab55,1 +np.float64,0x3e10000000000000, 0x3ff0000000400000,1 +np.float64,0x3cd0000000000000, 0x3ff0000000000004,1 +np.float64,0x40562e40a0000000, 0x47effed088821c3f,1 +np.float64,0x40562e42e0000000, 0x47effff082e6c7ff,1 +np.float64,0x40562e4300000000, 0x47f00000417184b8,1 +np.float64,0x3fe8000000000000, 0x4000ef9db467dcf8,1 +np.float64,0x402b12e8d4f33589, 0x412718f68c71a6fe,1 +np.float64,0x402b12e8d4f3358a, 0x412718f68c71a70a,1 +np.float64,0x402b12e8c0000000, 0x412718f59a7f472e,1 +np.float64,0x402b12e8e0000000, 0x412718f70c0eac62,1 diff --git a/numpy/core/tests/test_umath_accuracy.py b/numpy/core/tests/test_umath_accuracy.py index fd7214396..e3c2eb025 100644 --- a/numpy/core/tests/test_umath_accuracy.py +++ b/numpy/core/tests/test_umath_accuracy.py @@ -3,7 +3,7 @@ import platform from os import path import sys import pytest -from ctypes import c_float, c_int, cast, pointer, POINTER +from ctypes import c_longlong, c_double, c_float, c_int, cast, pointer, POINTER from numpy.testing import assert_array_max_ulp from numpy.core._multiarray_umath import __cpu_features__ @@ -16,10 +16,15 @@ platform_skip = pytest.mark.skipif(not runtest, # convert string to hex function taken from: # https://stackoverflow.com/questions/1592158/convert-hex-to-float # -def convert(s): +def convert(s, datatype="np.float32"): i = int(s, 16) # convert from hex to a Python int - cp = pointer(c_int(i)) # make this into a c integer - fp = cast(cp, POINTER(c_float)) # cast the int pointer to a float pointer + if (datatype == "np.float64"): + cp = pointer(c_longlong(i)) # make this into a c long long integer + fp = cast(cp, POINTER(c_double)) # cast the int pointer to a double pointer + else: + cp = pointer(c_int(i)) # make this into a c integer + fp = cast(cp, POINTER(c_float)) # cast the int pointer to a float pointer + return fp.contents.value # dereference the pointer, get the float str_to_float = np.vectorize(convert) @@ -45,8 +50,8 @@ class TestAccuracy: npfunc = getattr(np, filename.split('-')[3]) for datatype in np.unique(data['type']): data_subset = data[data['type'] == datatype] - inval = np.array(str_to_float(data_subset['input'].astype(str)), dtype=eval(datatype)) - outval = np.array(str_to_float(data_subset['output'].astype(str)), dtype=eval(datatype)) + inval = np.array(str_to_float(data_subset['input'].astype(str), data_subset['type'].astype(str)), dtype=eval(datatype)) + outval = np.array(str_to_float(data_subset['output'].astype(str), data_subset['type'].astype(str)), dtype=eval(datatype)) perm = np.random.permutation(len(inval)) inval = inval[perm] outval = outval[perm] |