diff options
author | HowJmay <yuanyanghau@gmail.com> | 2021-04-28 01:35:59 +0800 |
---|---|---|
committer | HowJmay <yuanyanghau@gmail.com> | 2021-05-01 01:40:02 +0800 |
commit | 18cb9b43e846ff1b42ce0b19d8909bd1be03773e (patch) | |
tree | 18a572aeeee88e418d108f66faafb46908a3a159 | |
parent | 967274c4cb8b67d4ecc0fbe2f0e17359cc6e1c97 (diff) | |
download | numpy-18cb9b43e846ff1b42ce0b19d8909bd1be03773e.tar.gz |
ENH: Add max values comparison for floating point
SIMD intrinsics for comparing double/single precision has been
added. Now only NEON and SSE ones are added.
-rw-r--r-- | numpy/core/src/_simd/_simd.dispatch.c.src | 10 | ||||
-rw-r--r-- | numpy/core/src/common/simd/avx2/math.h | 19 | ||||
-rw-r--r-- | numpy/core/src/common/simd/avx512/math.h | 17 | ||||
-rw-r--r-- | numpy/core/src/common/simd/neon/math.h | 20 | ||||
-rw-r--r-- | numpy/core/src/common/simd/sse/math.h | 19 | ||||
-rw-r--r-- | numpy/core/src/common/simd/vsx/math.h | 9 | ||||
-rw-r--r-- | numpy/core/tests/test_simd.py | 30 |
7 files changed, 124 insertions, 0 deletions
diff --git a/numpy/core/src/_simd/_simd.dispatch.c.src b/numpy/core/src/_simd/_simd.dispatch.c.src index 4acd20a75..d5984009e 100644 --- a/numpy/core/src/_simd/_simd.dispatch.c.src +++ b/numpy/core/src/_simd/_simd.dispatch.c.src @@ -387,6 +387,11 @@ SIMD_IMPL_INTRIN_1(@intrin@_@sfx@, v@sfx@, v@sfx@) /**end repeat1**/ #endif +#if @fp_only@ +SIMD_IMPL_INTRIN_2(max_@sfx@, v@sfx@, v@sfx@, v@sfx@) +SIMD_IMPL_INTRIN_2(maxp_@sfx@, v@sfx@, v@sfx@, v@sfx@) +#endif + /*************************** * Mask operations ***************************/ @@ -607,6 +612,11 @@ SIMD_INTRIN_DEF(@intrin@_@sfx@) /**end repeat1**/ #endif +#if @fp_only@ +SIMD_INTRIN_DEF(max_@sfx@) +SIMD_INTRIN_DEF(maxp_@sfx@) +#endif + /*************************** * Mask operations ***************************/ diff --git a/numpy/core/src/common/simd/avx2/math.h b/numpy/core/src/common/simd/avx2/math.h index b3eba6f5f..84ab024d0 100644 --- a/numpy/core/src/common/simd/avx2/math.h +++ b/numpy/core/src/common/simd/avx2/math.h @@ -37,4 +37,23 @@ NPY_FINLINE npyv_f32 npyv_square_f32(npyv_f32 a) NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a) { return _mm256_mul_pd(a, a); } +// Maximum, natively mapping with no guarantees to handle NaN. +#define npyv_max_f32 _mm256_max_ps +#define npyv_max_f64 _mm256_max_pd +// Maximum, supports IEEE floating-point arithmetic (IEC 60559), +// - If one of the two vectors contains NaN, the equivalent element of the other vector is set +// - Only if both corresponded elements are NaN, NaN is set. +NPY_FINLINE npyv_f32 npyv_maxp_f32(npyv_f32 a, npyv_f32 b) +{ + __m256 nn = _mm256_cmp_ps(b, b, _CMP_ORD_Q); + __m256 max = _mm256_max_ps(a, b); + return _mm256_blendv_ps(a, max, nn); +} +NPY_FINLINE npyv_f64 npyv_maxp_f64(npyv_f64 a, npyv_f64 b) +{ + __m256d nn = _mm256_cmp_pd(b, b, _CMP_ORD_Q); + __m256d max = _mm256_max_pd(a, b); + return _mm256_blendv_pd(a, max, nn); +} + #endif diff --git a/numpy/core/src/common/simd/avx512/math.h b/numpy/core/src/common/simd/avx512/math.h index 1db710670..668362298 100644 --- a/numpy/core/src/common/simd/avx512/math.h +++ b/numpy/core/src/common/simd/avx512/math.h @@ -46,4 +46,21 @@ NPY_FINLINE npyv_f32 npyv_square_f32(npyv_f32 a) NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a) { return _mm512_mul_pd(a, a); } +// Maximum, natively mapping with no guarantees to handle NaN. +#define npyv_max_f32 _mm512_max_ps +#define npyv_max_f64 _mm512_max_pd +// Maximum, supports IEEE floating-point arithmetic (IEC 60559), +// - If one of the two vectors contains NaN, the equivalent element of the other vector is set +// - Only if both corresponded elements are NaN, NaN is set. +NPY_FINLINE npyv_f32 npyv_maxp_f32(npyv_f32 a, npyv_f32 b) +{ + __mmask16 nn = _mm512_cmp_ps_mask(b, b, _CMP_ORD_Q); + return _mm512_mask_max_ps(a, nn, a, b); +} +NPY_FINLINE npyv_f64 npyv_maxp_f64(npyv_f64 a, npyv_f64 b) +{ + __mmask8 nn = _mm512_cmp_pd_mask(b, b, _CMP_ORD_Q); + return _mm512_mask_max_pd(a, nn, a, b); +} + #endif diff --git a/numpy/core/src/common/simd/neon/math.h b/numpy/core/src/common/simd/neon/math.h index a2bbdf2a5..09b7bbc9e 100644 --- a/numpy/core/src/common/simd/neon/math.h +++ b/numpy/core/src/common/simd/neon/math.h @@ -83,4 +83,24 @@ NPY_FINLINE npyv_f32 npyv_recip_f32(npyv_f32 a) } #endif // NPY_SIMD_F64 +// Maximum, natively mapping with no guarantees to handle NaN. +#define npyv_max_f32 vmaxq_f32 +#define npyv_max_f64 vmaxq_f64 +// Maximum, supports IEEE floating-point arithmetic (IEC 60559), +// - If one of the two vectors contains NaN, the equivalent element of the other vector is set +// - Only if both corresponded elements are NaN, NaN is set. +#ifdef NPY_HAVE_ASIMD + #define npyv_maxp_f32 vmaxnmq_f32 +#else + NPY_FINLINE npyv_f32 npyv_maxp_f32(npyv_f32 a, npyv_f32 b) + { + npyv_u32 nn_a = vceqq_f32(a, a); + npyv_u32 nn_b = vceqq_f32(b, b); + return vmaxq_f32(vbslq_f32(nn_a, a, b), vbslq_f32(nn_b, b, a)); + } +#endif +#if NPY_SIMD_F64 + #define npyv_maxp_f64 vmaxnmq_f64 +#endif // NPY_SIMD_F64 + #endif // _NPY_SIMD_SSE_MATH_H diff --git a/numpy/core/src/common/simd/sse/math.h b/numpy/core/src/common/simd/sse/math.h index b7203cd89..15e9f7e44 100644 --- a/numpy/core/src/common/simd/sse/math.h +++ b/numpy/core/src/common/simd/sse/math.h @@ -37,4 +37,23 @@ NPY_FINLINE npyv_f32 npyv_square_f32(npyv_f32 a) NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a) { return _mm_mul_pd(a, a); } +// Maximum, natively mapping with no guarantees to handle NaN. +#define npyv_max_f32 _mm_max_ps +#define npyv_max_f64 _mm_max_pd +// Maximum, supports IEEE floating-point arithmetic (IEC 60559), +// - If one of the two vectors contains NaN, the equivalent element of the other vector is set +// - Only if both corresponded elements are NaN, NaN is set. +NPY_FINLINE npyv_f32 npyv_maxp_f32(npyv_f32 a, npyv_f32 b) +{ + __m128 nn = _mm_cmpord_ps(b, b); + __m128 max = _mm_max_ps(a, b); + return npyv_select_f32(_mm_castps_si128(nn), max, a); +} +NPY_FINLINE npyv_f64 npyv_maxp_f64(npyv_f64 a, npyv_f64 b) +{ + __m128d nn = _mm_cmpord_pd(b, b); + __m128d max = _mm_max_pd(a, b); + return npyv_select_f64(_mm_castpd_si128(nn), max, a); +} + #endif diff --git a/numpy/core/src/common/simd/vsx/math.h b/numpy/core/src/common/simd/vsx/math.h index 7c8610b19..11bacd703 100644 --- a/numpy/core/src/common/simd/vsx/math.h +++ b/numpy/core/src/common/simd/vsx/math.h @@ -33,4 +33,13 @@ NPY_FINLINE npyv_f32 npyv_square_f32(npyv_f32 a) NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a) { return vec_mul(a, a); } +// Maximum, natively mapping with no guarantees to handle NaN. +#define npyv_max_f32 vec_max +#define npyv_max_f64 vec_max +// Maximum, supports IEEE floating-point arithmetic (IEC 60559), +// - If one of the two vectors contains NaN, the equivalent element of the other vector is set +// - Only if both corresponded elements are NaN, NaN is set. +#define npyv_maxp_f32 vec_max +#define npyv_maxp_f64 vec_max + #endif // _NPY_SIMD_VSX_MATH_H diff --git a/numpy/core/tests/test_simd.py b/numpy/core/tests/test_simd.py index 8306a86d3..fb7dd88cf 100644 --- a/numpy/core/tests/test_simd.py +++ b/numpy/core/tests/test_simd.py @@ -315,6 +315,36 @@ class _SIMD_FP(_Test_Utility): data_square = [x*x for x in data] square = self.square(vdata) assert square == data_square + + def test_max(self): + """ + Test intrinics: + npyv_max_##SFX + npyv_maxp_##SFX + """ + data_a = self._data() + data_b = self._data(self.nlanes) + vdata_a, vdata_b = self.load(data_a), self.load(data_b) + data_max = [max(a, b) for a, b in zip(data_a, data_b)] + _max = self.max(vdata_a, vdata_b) + assert _max == data_max + maxp = self.maxp(vdata_a, vdata_b) + assert maxp == data_max + # test IEEE standards + pinf, ninf, nan = self._pinfinity(), self._ninfinity(), self._nan() + max_cases = ((nan, nan, nan), (nan, 10, 10), (10, nan, 10), + (pinf, pinf, pinf), (pinf, 10, pinf), (10, pinf, pinf), + (ninf, ninf, ninf), (ninf, 10, 10), (10, ninf, 10)) + for case_operand1, case_operand2, desired in max_cases: + data_max = [desired]*self.nlanes + vdata_a = self.setall(case_operand1) + vdata_b = self.setall(case_operand2) + maxp = self.maxp(vdata_a, vdata_b) + assert maxp == pytest.approx(data_max, nan_ok=True) + if nan in (case_operand1, case_operand2, desired): + continue + _max = self.max(vdata_a, vdata_b) + assert _max == data_max def test_reciprocal(self): pinf, ninf, nan = self._pinfinity(), self._ninfinity(), self._nan() |