summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
Diffstat (limited to 'numpy')
-rw-r--r--numpy/core/code_generators/generate_umath.py1
-rw-r--r--numpy/core/include/numpy/npy_math.h85
-rw-r--r--numpy/core/src/umath/loops.c.src20
-rw-r--r--numpy/core/src/umath/loops.h.src6
-rw-r--r--numpy/core/src/umath/simd.inc.src181
-rw-r--r--numpy/core/tests/data/umath-validation-set-exp108
-rw-r--r--numpy/core/tests/test_umath_accuracy.py17
7 files changed, 412 insertions, 6 deletions
diff --git a/numpy/core/code_generators/generate_umath.py b/numpy/core/code_generators/generate_umath.py
index c14711d16..52ae3cdd7 100644
--- a/numpy/core/code_generators/generate_umath.py
+++ b/numpy/core/code_generators/generate_umath.py
@@ -702,6 +702,7 @@ defdict = {
None,
TD('e', f='exp', astype={'e':'f'}),
TD('f', simd=[('fma', 'f'), ('avx512f', 'f')]),
+ TD('d', simd=[('avx512f', 'd')]),
TD('fdg' + cmplx, f='exp'),
TD(P, f='exp'),
),
diff --git a/numpy/core/include/numpy/npy_math.h b/numpy/core/include/numpy/npy_math.h
index 69e690f28..7b29d5205 100644
--- a/numpy/core/include/numpy/npy_math.h
+++ b/numpy/core/include/numpy/npy_math.h
@@ -130,6 +130,91 @@ NPY_INLINE static float __npy_nzerof(void)
#define NPY_COEFF_Q2_EXPf 2.159509375685829852307e-02f
/*
+ * Constants used in vector implementation of float64 exp(x)
+ */
+#define NPY_RINT_CVT_MAGIC 0x1.8p52
+#define NPY_INV_LN2_MUL_32 0x1.71547652b82fep+5
+#define NPY_TANG_NEG_L1 -0x1.62e42fefp-6
+#define NPY_TANG_NEG_L2 -0x1.473de6af278edp-39
+#define NPY_TANG_A1 0x1p-1
+#define NPY_TANG_A2 0x1.5555555548f7cp-3
+#define NPY_TANG_A3 0x1.5555555545d4ep-5
+#define NPY_TANG_A4 0x1.11115b7aa905ep-7
+#define NPY_TANG_A5 0x1.6c1728d739765p-10
+
+/* Lookup table for 2^(j/32) */
+static npy_uint64 EXP_Table_top[32] = {
+ 0x3FF0000000000000,
+ 0x3FF059B0D3158540,
+ 0x3FF0B5586CF98900,
+ 0x3FF11301D0125B40,
+ 0x3FF172B83C7D5140,
+ 0x3FF1D4873168B980,
+ 0x3FF2387A6E756200,
+ 0x3FF29E9DF51FDEC0,
+ 0x3FF306FE0A31B700,
+ 0x3FF371A7373AA9C0,
+ 0x3FF3DEA64C123400,
+ 0x3FF44E0860618900,
+ 0x3FF4BFDAD5362A00,
+ 0x3FF5342B569D4F80,
+ 0x3FF5AB07DD485400,
+ 0x3FF6247EB03A5580,
+ 0x3FF6A09E667F3BC0,
+ 0x3FF71F75E8EC5F40,
+ 0x3FF7A11473EB0180,
+ 0x3FF82589994CCE00,
+ 0x3FF8ACE5422AA0C0,
+ 0x3FF93737B0CDC5C0,
+ 0x3FF9C49182A3F080,
+ 0x3FFA5503B23E2540,
+ 0x3FFAE89F995AD380,
+ 0x3FFB7F76F2FB5E40,
+ 0x3FFC199BDD855280,
+ 0x3FFCB720DCEF9040,
+ 0x3FFD5818DCFBA480,
+ 0x3FFDFC97337B9B40,
+ 0x3FFEA4AFA2A490C0,
+ 0x3FFF50765B6E4540,
+};
+
+static npy_uint64 EXP_Table_tail[32] = {
+ 0x0000000000000000,
+ 0x3D0A1D73E2A475B4,
+ 0x3CEEC5317256E308,
+ 0x3CF0A4EBBF1AED93,
+ 0x3D0D6E6FBE462876,
+ 0x3D053C02DC0144C8,
+ 0x3D0C3360FD6D8E0B,
+ 0x3D009612E8AFAD12,
+ 0x3CF52DE8D5A46306,
+ 0x3CE54E28AA05E8A9,
+ 0x3D011ADA0911F09F,
+ 0x3D068189B7A04EF8,
+ 0x3D038EA1CBD7F621,
+ 0x3CBDF0A83C49D86A,
+ 0x3D04AC64980A8C8F,
+ 0x3CD2C7C3E81BF4B7,
+ 0x3CE921165F626CDD,
+ 0x3D09EE91B8797785,
+ 0x3CDB5F54408FDB37,
+ 0x3CF28ACF88AFAB35,
+ 0x3CFB5BA7C55A192D,
+ 0x3D027A280E1F92A0,
+ 0x3CF01C7C46B071F3,
+ 0x3CFC8B424491CAF8,
+ 0x3D06AF439A68BB99,
+ 0x3CDBAA9EC206AD4F,
+ 0x3CFC2220CB12A092,
+ 0x3D048A81E5E8F4A5,
+ 0x3CDC976816BAD9B8,
+ 0x3CFEB968CAC39ED3,
+ 0x3CF9858F73A18F5E,
+ 0x3C99D3E12DD8A18B,
+};
+
+
+/*
* Constants used in vector implementation of log(x)
*/
#define NPY_COEFF_P0_LOGf 0.000000000000000000000e+00f
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index 9b43824cb..eea82309c 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -1558,6 +1558,15 @@ FLOAT_@func@(char **args, npy_intp const *dimensions, npy_intp const *steps, voi
/**end repeat**/
+NPY_NO_EXPORT NPY_GCC_OPT_3 void
+DOUBLE_exp(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
+{
+ UNARY_LOOP {
+ const npy_double in1 = *(npy_double *)ip1;
+ *(npy_double *)op1 = npy_exp(in1);
+ }
+}
+
/**begin repeat
* #isa = avx512f, fma#
* #ISA = AVX512F, FMA#
@@ -1688,6 +1697,17 @@ FLOAT_@func@_@isa@(char **args, npy_intp const *dimensions, npy_intp const *step
/**end repeat1**/
/**end repeat**/
+NPY_NO_EXPORT NPY_GCC_OPT_3 void
+DOUBLE_exp_avx512f(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(data))
+{
+ if (!run_unary_avx512f_exp_DOUBLE(args, dimensions, steps)) {
+ UNARY_LOOP {
+ const npy_double in1 = *(npy_double *)ip1;
+ *(npy_double *)op1 = npy_exp(in1);
+ }
+ }
+}
+
/**begin repeat
* Float types
diff --git a/numpy/core/src/umath/loops.h.src b/numpy/core/src/umath/loops.h.src
index e9d0b4c62..50a7ccfee 100644
--- a/numpy/core/src/umath/loops.h.src
+++ b/numpy/core/src/umath/loops.h.src
@@ -196,6 +196,12 @@ NPY_NO_EXPORT void
/**end repeat1**/
/**end repeat**/
+NPY_NO_EXPORT void
+DOUBLE_exp(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
+
+NPY_NO_EXPORT void
+DOUBLE_exp_avx512f(char **args, npy_intp const *dimensions, npy_intp const *steps, void *NPY_UNUSED(func));
+
/**begin repeat
* #func = sin, cos, exp, log#
*/
diff --git a/numpy/core/src/umath/simd.inc.src b/numpy/core/src/umath/simd.inc.src
index 4265476b5..68a1b8aa7 100644
--- a/numpy/core/src/umath/simd.inc.src
+++ b/numpy/core/src/umath/simd.inc.src
@@ -387,6 +387,23 @@ run_unary_@isa@_sincos_FLOAT(char **args, npy_intp const *dimensions, npy_intp c
/**end repeat**/
+#if defined HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS
+static NPY_INLINE void
+AVX512F_exp_DOUBLE(npy_double *, npy_double *, const npy_intp n, const npy_intp stride);
+#endif
+static NPY_INLINE int
+run_unary_avx512f_exp_DOUBLE(char **args, npy_intp const *dimensions, npy_intp const *steps)
+{
+#if defined HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS && defined NPY_HAVE_SSE2_INTRINSICS
+ if (IS_OUTPUT_BLOCKABLE_UNARY(sizeof(npy_double), 64)) {
+ AVX512F_exp_DOUBLE((npy_double*)args[1], (npy_double*)args[0], dimensions[0], steps[0]);
+ return 1;
+ }
+ else
+ return 0;
+#endif
+ return 0;
+}
/**begin repeat
* Float types
@@ -1695,6 +1712,22 @@ avx512_scalef_ps(__m512 poly, __m512 quadrant)
{
return _mm512_scalef_ps(poly, quadrant);
}
+
+static NPY_INLINE NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F __m512d
+avx512_permute_x4var_pd(__m512d t0,
+ __m512d t1,
+ __m512d t2,
+ __m512d t3,
+ __m512i index)
+{
+
+ __mmask8 lut_mask = _mm512_cmp_epi64_mask(index, _mm512_set1_epi64(15),
+ _MM_CMPINT_GT);
+ __m512d res1 = _mm512_permutex2var_pd(t0, index, t1);
+ __m512d res2 = _mm512_permutex2var_pd(t2, index, t3);
+ return _mm512_mask_blend_pd(lut_mask, res1, res2);
+}
+
/**begin repeat
* #vsub = ps, pd#
* #type= npy_float, npy_double#
@@ -2654,6 +2687,154 @@ static NPY_GCC_OPT_3 NPY_GCC_TARGET_@ISA@ void
#endif
/**end repeat**/
+/*
+ * Vectorized implementation of exp double using AVX512
+ * Reference: Tang, P.T.P., "Table-driven implementation of the
+ * exponential function in IEEE floating-point
+ * arithmetic," ACM Transactions on Mathematical
+ * Software, vol. 15, pp. 144-157, 1989.
+ * 1) if x > mTH_max or x is INF; return INF (overflow)
+ * 2) if x < mTH_min; return 0.0f (underflow)
+ * 3) if abs(x) < mTH_nearzero; return 1.0f + x
+ * 4) Range reduction:
+ * x = (32m + j)ln2 / 32 + r; r in [-ln2/64, ln2/64]
+ * 5) exp(r) - 1 is approximated by a polynomial function p(r)
+ * exp(x) = 2^m(2^(j/32) + 2^(j/32)p(r));
+ */
+#if defined HAVE_ATTRIBUTE_TARGET_AVX512F_WITH_INTRINSICS
+static NPY_GCC_OPT_3 NPY_GCC_TARGET_AVX512F void
+AVX512F_exp_DOUBLE(npy_double * op,
+ npy_double * ip,
+ const npy_intp array_size,
+ const npy_intp steps)
+{
+ npy_intp num_remaining_elements = array_size;
+ const npy_intp stride = steps / sizeof(npy_double);
+ const npy_int num_lanes = 64 / sizeof(npy_double);
+ npy_int indexarr[8];
+ for (npy_int ii = 0; ii < 8; ii++) {
+ indexarr[ii] = ii*stride;
+ }
+
+ __m512d InvLn2N = _mm512_set1_pd(NPY_INV_LN2_MUL_32);
+ __m512d mShift = _mm512_set1_pd(NPY_RINT_CVT_MAGIC);
+ __m512d mNegL1 = _mm512_set1_pd(NPY_TANG_NEG_L1);
+ __m512d mNegL2 = _mm512_set1_pd(NPY_TANG_NEG_L2);
+ __m512i mMod = _mm512_set1_epi64(0x1f);
+ __m512d mA1 = _mm512_set1_pd(NPY_TANG_A1);
+ __m512d mA2 = _mm512_set1_pd(NPY_TANG_A2);
+ __m512d mA3 = _mm512_set1_pd(NPY_TANG_A3);
+ __m512d mA4 = _mm512_set1_pd(NPY_TANG_A4);
+ __m512d mA5 = _mm512_set1_pd(NPY_TANG_A5);
+ __m512d mTH_nearzero = _mm512_set1_pd(0x1p-54);
+ __m512d mTH_max = _mm512_set1_pd(0x1.62e42fefa39efp+9);
+ __m512d mTH_min = _mm512_set1_pd(-0x1.74910d52d3053p+9);
+ __m512d mTH_inf = _mm512_set1_pd(NPY_INFINITY);
+ __m512d zeros_d = _mm512_set1_pd(0.0f);
+ __m512d ones_d = _mm512_set1_pd(1.0f);
+ __m256i vindex = _mm256_loadu_si256((__m256i*)&indexarr[0]);
+
+ __m512d mTable_top_0 = _mm512_loadu_pd(&(EXP_Table_top[0]));
+ __m512d mTable_top_1 = _mm512_loadu_pd(&(EXP_Table_top[8]));
+ __m512d mTable_top_2 = _mm512_loadu_pd(&(EXP_Table_top[8*2]));
+ __m512d mTable_top_3 = _mm512_loadu_pd(&(EXP_Table_top[8*3]));
+ __m512d mTable_tail_0 = _mm512_loadu_pd(&(EXP_Table_tail[0]));
+ __m512d mTable_tail_1 = _mm512_loadu_pd(&(EXP_Table_tail[8]));
+ __m512d mTable_tail_2 = _mm512_loadu_pd(&(EXP_Table_tail[8*2]));
+ __m512d mTable_tail_3 = _mm512_loadu_pd(&(EXP_Table_tail[8*3]));
+
+ __mmask8 overflow_mask = avx512_get_partial_load_mask_pd(0, num_lanes);
+ __mmask8 load_mask = avx512_get_full_load_mask_pd();
+ __mmask8 xmin_mask, xmax_mask, inf_mask, nan_mask, nearzero_mask;
+
+ while (num_remaining_elements > 0) {
+ if (num_remaining_elements < num_lanes) {
+ load_mask = avx512_get_partial_load_mask_pd(num_remaining_elements,
+ num_lanes);
+ }
+
+ __m512d x;
+ if (1 == stride) {
+ x = avx512_masked_load_pd(load_mask, ip);
+ }
+ else {
+ x = avx512_masked_gather_pd(zeros_d, ip, vindex, load_mask);
+ }
+
+ nan_mask = _mm512_cmp_pd_mask(x, x, _CMP_NEQ_UQ);
+ x = avx512_set_masked_lanes_pd(x, zeros_d, nan_mask);
+ xmax_mask = _mm512_cmp_pd_mask(x, mTH_max, _CMP_GT_OQ);
+ xmin_mask = _mm512_cmp_pd_mask(x, mTH_min, _CMP_LT_OQ);
+ inf_mask = _mm512_cmp_pd_mask(x, mTH_inf, _CMP_EQ_OQ);
+ __m512i x_abs = _mm512_and_epi64(_mm512_castpd_si512(x),
+ _mm512_set1_epi64(0x7FFFFFFFFFFFFFFF));
+ nearzero_mask = _mm512_cmp_pd_mask(_mm512_castsi512_pd(x_abs),
+ mTH_nearzero, _CMP_LT_OQ);
+ nearzero_mask = _mm512_kxor(nearzero_mask, nan_mask);
+ overflow_mask = _mm512_kor(overflow_mask,
+ _mm512_kxor(xmax_mask, inf_mask));
+ x = avx512_set_masked_lanes_pd(x, zeros_d,
+ _mm512_kor(_mm512_kor(nan_mask, xmin_mask),
+ _mm512_kor(xmax_mask, nearzero_mask)));
+
+ /* z = x * 32/ln2 */
+ __m512d z = _mm512_mul_pd(x, InvLn2N);
+
+ /* round to nearest */
+ __m512d kd = _mm512_add_pd(z, mShift);
+ __m512i ki = _mm512_castpd_si512(kd);
+ kd = _mm512_sub_pd(kd, mShift);
+
+ /* r = (x + kd*mNegL1) + kd*mNegL2 */
+ __m512d r1 = _mm512_fmadd_pd(kd, mNegL1, x);
+ __m512d r2 = _mm512_mul_pd(kd, mNegL2);
+ __m512d r = _mm512_add_pd(r1,r2);
+
+ /* Polynomial approximation for exp(r) - 1 */
+ __m512d q = _mm512_fmadd_pd(mA5, r, mA4);
+ q = _mm512_fmadd_pd(q, r, mA3);
+ q = _mm512_fmadd_pd(q, r, mA2);
+ q = _mm512_fmadd_pd(q, r, mA1);
+ q = _mm512_mul_pd(q, r);
+ __m512d p = _mm512_fmadd_pd(r, q, r2);;
+ p = _mm512_add_pd(r1, p);
+
+ /* Get 2^(j/32) from lookup table */
+ __m512i j = _mm512_and_epi64(ki, mMod);
+ __m512d top = avx512_permute_x4var_pd(mTable_top_0, mTable_top_1,
+ mTable_top_2, mTable_top_3, j);
+ __m512d tail = avx512_permute_x4var_pd(mTable_tail_0, mTable_tail_1,
+ mTable_tail_2, mTable_tail_3, j);
+
+ /*
+ * s = top + tail;
+ * exp(x) = 2^m * (top + (tail + s * p));
+ */
+ __m512d s = _mm512_add_pd(top, tail);
+ __m512d res = _mm512_fmadd_pd(s, p, tail);
+ res = _mm512_add_pd(res, top);
+ res= _mm512_scalef_pd(res, _mm512_div_pd(kd, _mm512_set1_pd(32)));
+
+ /* return special cases */
+ res = avx512_set_masked_lanes_pd(res, _mm512_add_pd(x, ones_d),
+ nearzero_mask);
+ res = avx512_set_masked_lanes_pd(res, _mm512_set1_pd(NPY_NAN),
+ nan_mask);
+ res = avx512_set_masked_lanes_pd(res, mTH_inf, xmax_mask);
+ res = avx512_set_masked_lanes_pd(res, zeros_d, xmin_mask);
+
+ _mm512_mask_storeu_pd(op, load_mask, res);
+
+ ip += num_lanes * stride;
+ op += num_lanes;
+ num_remaining_elements -= num_lanes;
+ }
+ if (overflow_mask) {
+ npy_set_floatstatus_overflow();
+ }
+}
+#endif
+
/**begin repeat
* #TYPE = CFLOAT, CDOUBLE#
* #type = npy_float, npy_double#
diff --git a/numpy/core/tests/data/umath-validation-set-exp b/numpy/core/tests/data/umath-validation-set-exp
index 1b2cc9ce4..58bba300e 100644
--- a/numpy/core/tests/data/umath-validation-set-exp
+++ b/numpy/core/tests/data/umath-validation-set-exp
@@ -133,3 +133,111 @@ np.float32,0xc29b43d5,0x077ffffc,3
np.float32,0xc1e61ff7,0x2ab504f5,3
np.float32,0xc2867878,0x0effff15,3
np.float32,0xc2a2324a,0x04fffff4,3
+#float64
+#near zero
+np.float64,0x8000000000000000, 0x3ff0000000000000,1
+np.float64,0x8010000000000000, 0x3ff0000000000000,1
+np.float64,0x8000000000000001, 0x3ff0000000000000,1
+np.float64,0x8360000000000000, 0x3ff0000000000000,1
+np.float64,0x9a70000000000000, 0x3ff0000000000000,1
+np.float64,0xb9b0000000000000, 0x3ff0000000000000,1
+np.float64,0xb810000000000000, 0x3ff0000000000000,1
+np.float64,0xbc30000000000000, 0x3ff0000000000000,1
+np.float64,0xb6a0000000000000, 0x3ff0000000000000,1
+np.float64,0x0000000000000000, 0x3ff0000000000000,1
+np.float64,0x0010000000000000, 0x3ff0000000000000,1
+np.float64,0x0000000000000001, 0x3ff0000000000000,1
+np.float64,0x0360000000000000, 0x3ff0000000000000,1
+np.float64,0x1a70000000000000, 0x3ff0000000000000,1
+np.float64,0x3c30000000000000, 0x3ff0000000000000,1
+np.float64,0x36a0000000000000, 0x3ff0000000000000,1
+np.float64,0x39b0000000000000, 0x3ff0000000000000,1
+np.float64,0x3810000000000000, 0x3ff0000000000000,1
+#underflow
+np.float64,0xc0c6276800000000, 0x0000000000000000,1
+np.float64,0xc0c62d918ce2421d, 0x0000000000000000,1
+np.float64,0xc0c62d918ce2421e, 0x0000000000000000,1
+np.float64,0xc0c62d91a0000000, 0x0000000000000000,1
+np.float64,0xc0c62d9180000000, 0x0000000000000000,1
+np.float64,0xc0c62dea45ee3e06, 0x0000000000000000,1
+np.float64,0xc0c62dea45ee3e07, 0x0000000000000000,1
+np.float64,0xc0c62dea40000000, 0x0000000000000000,1
+np.float64,0xc0c62dea60000000, 0x0000000000000000,1
+np.float64,0xc087438520000000, 0x0000000000000001,1
+np.float64,0xc08743853f2f4461, 0x0000000000000001,1
+np.float64,0xc08743853f2f4460, 0x0000000000000001,1
+np.float64,0xc087438540000000, 0x0000000000000001,1
+np.float64,0xc0875f1120000000, 0x0000000000000000,1
+np.float64,0xc0875f113c30b1c8, 0x0000000000000000,1
+np.float64,0xc0875f1140000000, 0x0000000000000000,1
+np.float64,0xc093480000000000, 0x0000000000000000,1
+np.float64,0xffefffffffffffff, 0x0000000000000000,1
+np.float64,0xc7efffffe0000000, 0x0000000000000000,1
+#between −745.13321910 and 709.78271289
+np.float64,0xbff760cd14774bd9, 0x3fcdb14ced00ceb6,1
+np.float64,0xbff760cd20000000, 0x3fcdb14cd7993879,1
+np.float64,0xbff760cd00000000, 0x3fcdb14d12fbd264,1
+np.float64,0xc07f1cf360000000, 0x130c1b369af14fda,1
+np.float64,0xbeb0000000000000, 0x3feffffe00001000,1
+np.float64,0xbd70000000000000, 0x3fefffffffffe000,1
+np.float64,0xc084fd46e5c84952, 0x0360000000000139,1
+np.float64,0xc084fd46e5c84953, 0x035ffffffffffe71,1
+np.float64,0xc084fd46e0000000, 0x0360000b9096d32c,1
+np.float64,0xc084fd4700000000, 0x035fff9721d12104,1
+np.float64,0xc086232bc0000000, 0x0010003af5e64635,1
+np.float64,0xc086232bdd7abcd2, 0x001000000000007c,1
+np.float64,0xc086232bdd7abcd3, 0x000ffffffffffe7c,1
+np.float64,0xc086232be0000000, 0x000ffffaf57a6fc9,1
+np.float64,0xc086233920000000, 0x000fe590e3b45eb0,1
+np.float64,0xc086233938000000, 0x000fe56133493c57,1
+np.float64,0xc086233940000000, 0x000fe5514deffbbc,1
+np.float64,0xc086234c98000000, 0x000fbf1024c32ccb,1
+np.float64,0xc086234ca0000000, 0x000fbf0065bae78d,1
+np.float64,0xc086234c80000000, 0x000fbf3f623a7724,1
+np.float64,0xc086234ec0000000, 0x000fbad237c846f9,1
+np.float64,0xc086234ec8000000, 0x000fbac27cfdec97,1
+np.float64,0xc086234ee0000000, 0x000fba934cfd3dc2,1
+np.float64,0xc086234ef0000000, 0x000fba73d7f618d9,1
+np.float64,0xc086234f00000000, 0x000fba54632dddbf,1
+np.float64,0xc0862356e0000000, 0x000faae0945b761a,1
+np.float64,0xc0862356f0000000, 0x000faac13eb9a310,1
+np.float64,0xc086235700000000, 0x000faaa1e9567b0a,1
+np.float64,0xc086236020000000, 0x000f98cd75c11ed7,1
+np.float64,0xc086236ca0000000, 0x000f8081b4d93f89,1
+np.float64,0xc086236cb0000000, 0x000f8062b3f4d6c5,1
+np.float64,0xc086236cc0000000, 0x000f8043b34e6f8c,1
+np.float64,0xc086238d98000000, 0x000f41220d9b0d2c,1
+np.float64,0xc086238da0000000, 0x000f4112cc80a01f,1
+np.float64,0xc086238d80000000, 0x000f414fd145db5b,1
+np.float64,0xc08624fd00000000, 0x000cbfce8ea1e6c4,1
+np.float64,0xc086256080000000, 0x000c250747fcd46e,1
+np.float64,0xc08626c480000000, 0x000a34f4bd975193,1
+np.float64,0xbf50000000000000, 0x3feff800ffeaac00,1
+np.float64,0xbe10000000000000, 0x3fefffffff800000,1
+np.float64,0xbcd0000000000000, 0x3feffffffffffff8,1
+np.float64,0xc055d589e0000000, 0x38100004bf94f63e,1
+np.float64,0xc055d58a00000000, 0x380ffff97f292ce8,1
+np.float64,0xbfd962d900000000, 0x3fe585a4b00110e1,1
+np.float64,0x3ff4bed280000000, 0x400d411e7a58a303,1
+np.float64,0x3fff0b3620000000, 0x401bd7737ffffcf3,1
+np.float64,0x3ff0000000000000, 0x4005bf0a8b145769,1
+np.float64,0x3eb0000000000000, 0x3ff0000100000800,1
+np.float64,0x3d70000000000000, 0x3ff0000000001000,1
+np.float64,0x40862e42e0000000, 0x7fefff841808287f,1
+np.float64,0x40862e42fefa39ef, 0x7fefffffffffff2a,1
+np.float64,0x40862e0000000000, 0x7feef85a11e73f2d,1
+np.float64,0x4000000000000000, 0x401d8e64b8d4ddae,1
+np.float64,0x4009242920000000, 0x40372a52c383a488,1
+np.float64,0x4049000000000000, 0x44719103e4080b45,1
+np.float64,0x4008000000000000, 0x403415e5bf6fb106,1
+np.float64,0x3f50000000000000, 0x3ff00400800aab55,1
+np.float64,0x3e10000000000000, 0x3ff0000000400000,1
+np.float64,0x3cd0000000000000, 0x3ff0000000000004,1
+np.float64,0x40562e40a0000000, 0x47effed088821c3f,1
+np.float64,0x40562e42e0000000, 0x47effff082e6c7ff,1
+np.float64,0x40562e4300000000, 0x47f00000417184b8,1
+np.float64,0x3fe8000000000000, 0x4000ef9db467dcf8,1
+np.float64,0x402b12e8d4f33589, 0x412718f68c71a6fe,1
+np.float64,0x402b12e8d4f3358a, 0x412718f68c71a70a,1
+np.float64,0x402b12e8c0000000, 0x412718f59a7f472e,1
+np.float64,0x402b12e8e0000000, 0x412718f70c0eac62,1
diff --git a/numpy/core/tests/test_umath_accuracy.py b/numpy/core/tests/test_umath_accuracy.py
index fd7214396..e3c2eb025 100644
--- a/numpy/core/tests/test_umath_accuracy.py
+++ b/numpy/core/tests/test_umath_accuracy.py
@@ -3,7 +3,7 @@ import platform
from os import path
import sys
import pytest
-from ctypes import c_float, c_int, cast, pointer, POINTER
+from ctypes import c_longlong, c_double, c_float, c_int, cast, pointer, POINTER
from numpy.testing import assert_array_max_ulp
from numpy.core._multiarray_umath import __cpu_features__
@@ -16,10 +16,15 @@ platform_skip = pytest.mark.skipif(not runtest,
# convert string to hex function taken from:
# https://stackoverflow.com/questions/1592158/convert-hex-to-float #
-def convert(s):
+def convert(s, datatype="np.float32"):
i = int(s, 16) # convert from hex to a Python int
- cp = pointer(c_int(i)) # make this into a c integer
- fp = cast(cp, POINTER(c_float)) # cast the int pointer to a float pointer
+ if (datatype == "np.float64"):
+ cp = pointer(c_longlong(i)) # make this into a c long long integer
+ fp = cast(cp, POINTER(c_double)) # cast the int pointer to a double pointer
+ else:
+ cp = pointer(c_int(i)) # make this into a c integer
+ fp = cast(cp, POINTER(c_float)) # cast the int pointer to a float pointer
+
return fp.contents.value # dereference the pointer, get the float
str_to_float = np.vectorize(convert)
@@ -45,8 +50,8 @@ class TestAccuracy:
npfunc = getattr(np, filename.split('-')[3])
for datatype in np.unique(data['type']):
data_subset = data[data['type'] == datatype]
- inval = np.array(str_to_float(data_subset['input'].astype(str)), dtype=eval(datatype))
- outval = np.array(str_to_float(data_subset['output'].astype(str)), dtype=eval(datatype))
+ inval = np.array(str_to_float(data_subset['input'].astype(str), data_subset['type'].astype(str)), dtype=eval(datatype))
+ outval = np.array(str_to_float(data_subset['output'].astype(str), data_subset['type'].astype(str)), dtype=eval(datatype))
perm = np.random.permutation(len(inval))
inval = inval[perm]
outval = outval[perm]