diff options
author | Raghuveer Devulapalli <raghuveer.devulapalli@intel.com> | 2019-03-05 09:13:55 -0800 |
---|---|---|
committer | Raghuveer Devulapalli <raghuveer.devulapalli@intel.com> | 2019-04-19 10:47:15 -0700 |
commit | 9754a207828f377654c79873e38d475bb87d98de (patch) | |
tree | 6512d0febf26593ac946722d9c38ca57a4bbbdfb /numpy/core/include | |
parent | 31e71d7ce8d447cb74b9fb83875361cf7dba4579 (diff) | |
download | numpy-9754a207828f377654c79873e38d475bb87d98de.tar.gz |
ENH: vectorizing float32 implementation of np.exp & np.log
This commit implements vectorized single precision exponential and
natural log using AVX2 and AVX512.
Accuracy:
| Function | Max ULP Error | Max Relative Error |
|----------|---------------|--------------------|
| np.exp | 2.52 | 2.1E-07 |
| np.log | 3.83 | 2.4E-07 |
Performance:
(1) Micro-benchmarks: measured execution time of np.exp and np.log using
timeit package in python. Each function is executed 1000 times and this
is repeated 100 times. The standard deviation for all the runs was less
than 2% of their mean value and hence not included in the data. The
vectorized implementation was upto 7.6x faster than the scalar version.
| Function | NumPy1.16 | AVX2 | AVX512 | AVX2 speedup | AVX512 speedup |
| -------- | --------- | ------ | ------ | ------------ | -------------- |
| np.exp | 0.395s | 0.112s | 0.055s | 3.56x | 7.25x |
| np.log | 0.456s | 0.147s | 0.059s | 3.10x | 7.64x |
(2) Logistic regression: exp and log are heavily used in training neural
networks (as part of sigmoid activation function and loss function
respectively). This patch significantly speeds up training a logistic
regression model. As an example, we measured how much time it takes to
train a model with 15 features using 1000 training data points. We
observed a 2x speed up to train the model to achieve a loss function
error < 10E-04.
| Function | NumPy1.16 | AVX2 | AVX512 | AVX2 speedup | AVX512 speedup |
| -------------- | ---------- | ------ | ------ | ------------ | -------------- |
| logistic.train | 121.0s | 75.02s | 60.60s | 1.61x | 2.02x |
Diffstat (limited to 'numpy/core/include')
-rw-r--r-- | numpy/core/include/numpy/npy_common.h | 13 | ||||
-rw-r--r-- | numpy/core/include/numpy/npy_math.h | 32 |
2 files changed, 45 insertions, 0 deletions
diff --git a/numpy/core/include/numpy/npy_common.h b/numpy/core/include/numpy/npy_common.h index 64aaaacff..d83080160 100644 --- a/numpy/core/include/numpy/npy_common.h +++ b/numpy/core/include/numpy/npy_common.h @@ -50,6 +50,12 @@ #define NPY_GCC_TARGET_AVX2 #endif +#if defined HAVE_ATTRIBUTE_TARGET_AVX512F && defined HAVE_LINK_AVX512F +#define NPY_GCC_TARGET_AVX512F __attribute__((target("avx512f"))) +#else +#define NPY_GCC_TARGET_AVX512F +#endif + /* * mark an argument (starting from 1) that must not be NULL and is not checked * DO NOT USE IF FUNCTION CHECKS FOR NULL!! the compiler will remove the check @@ -68,6 +74,13 @@ #define NPY_HAVE_SSE2_INTRINSICS #endif +#if defined HAVE_IMMINTRIN_H && defined HAVE_LINK_AVX2 +#define NPY_HAVE_AVX2_INTRINSICS +#endif + +#if defined HAVE_IMMINTRIN_H && defined HAVE_LINK_AVX512F +#define NPY_HAVE_AVX512F_INTRINSICS +#endif /* * give a hint to the compiler which branch is more likely or unlikely * to occur, e.g. rare error cases: diff --git a/numpy/core/include/numpy/npy_math.h b/numpy/core/include/numpy/npy_math.h index 582390cdc..dfb8ff526 100644 --- a/numpy/core/include/numpy/npy_math.h +++ b/numpy/core/include/numpy/npy_math.h @@ -113,6 +113,38 @@ NPY_INLINE static float __npy_nzerof(void) #define NPY_SQRT2l 1.414213562373095048801688724209698079L /* sqrt(2) */ #define NPY_SQRT1_2l 0.707106781186547524400844362104849039L /* 1/sqrt(2) */ +/* + * Constants used in vector implementation of exp(x) + */ +#define NPY_RINT_CVT_MAGICf 0x1.800000p+23f +#define NPY_CODY_WAITE_LOGE_2_HIGHf -6.93145752e-1f +#define NPY_CODY_WAITE_LOGE_2_LOWf -1.42860677e-6f +#define NPY_COEFF_P0_EXPf 9.999999999980870924916e-01f +#define NPY_COEFF_P1_EXPf 7.257664613233124478488e-01f +#define NPY_COEFF_P2_EXPf 2.473615434895520810817e-01f +#define NPY_COEFF_P3_EXPf 5.114512081637298353406e-02f +#define NPY_COEFF_P4_EXPf 6.757896990527504603057e-03f +#define NPY_COEFF_P5_EXPf 5.082762527590693718096e-04f +#define NPY_COEFF_Q0_EXPf 1.000000000000000000000e+00f +#define NPY_COEFF_Q1_EXPf -2.742335390411667452936e-01f +#define NPY_COEFF_Q2_EXPf 2.159509375685829852307e-02f + +/* + * Constants used in vector implementation of log(x) + */ +#define NPY_COEFF_P0_LOGf 0.000000000000000000000e+00f +#define NPY_COEFF_P1_LOGf 9.999999999999998702752e-01f +#define NPY_COEFF_P2_LOGf 2.112677543073053063722e+00f +#define NPY_COEFF_P3_LOGf 1.480000633576506585156e+00f +#define NPY_COEFF_P4_LOGf 3.808837741388407920751e-01f +#define NPY_COEFF_P5_LOGf 2.589979117907922693523e-02f +#define NPY_COEFF_Q0_LOGf 1.000000000000000000000e+00f +#define NPY_COEFF_Q1_LOGf 2.612677543073109236779e+00f +#define NPY_COEFF_Q2_LOGf 2.453006071784736363091e+00f +#define NPY_COEFF_Q3_LOGf 9.864942958519418960339e-01f +#define NPY_COEFF_Q4_LOGf 1.546476374983906719538e-01f +#define NPY_COEFF_Q5_LOGf 5.875095403124574342950e-03f + /* * C99 double math funcs */ |