summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
authorHowJmay <yuanyanghau@gmail.com>2021-04-28 01:35:59 +0800
committerHowJmay <yuanyanghau@gmail.com>2021-05-01 01:40:02 +0800
commit18cb9b43e846ff1b42ce0b19d8909bd1be03773e (patch)
tree18a572aeeee88e418d108f66faafb46908a3a159 /numpy
parent967274c4cb8b67d4ecc0fbe2f0e17359cc6e1c97 (diff)
downloadnumpy-18cb9b43e846ff1b42ce0b19d8909bd1be03773e.tar.gz
ENH: Add max values comparison for floating point
SIMD intrinsics for comparing double/single precision has been added. Now only NEON and SSE ones are added.
Diffstat (limited to 'numpy')
-rw-r--r--numpy/core/src/_simd/_simd.dispatch.c.src10
-rw-r--r--numpy/core/src/common/simd/avx2/math.h19
-rw-r--r--numpy/core/src/common/simd/avx512/math.h17
-rw-r--r--numpy/core/src/common/simd/neon/math.h20
-rw-r--r--numpy/core/src/common/simd/sse/math.h19
-rw-r--r--numpy/core/src/common/simd/vsx/math.h9
-rw-r--r--numpy/core/tests/test_simd.py30
7 files changed, 124 insertions, 0 deletions
diff --git a/numpy/core/src/_simd/_simd.dispatch.c.src b/numpy/core/src/_simd/_simd.dispatch.c.src
index 4acd20a75..d5984009e 100644
--- a/numpy/core/src/_simd/_simd.dispatch.c.src
+++ b/numpy/core/src/_simd/_simd.dispatch.c.src
@@ -387,6 +387,11 @@ SIMD_IMPL_INTRIN_1(@intrin@_@sfx@, v@sfx@, v@sfx@)
/**end repeat1**/
#endif
+#if @fp_only@
+SIMD_IMPL_INTRIN_2(max_@sfx@, v@sfx@, v@sfx@, v@sfx@)
+SIMD_IMPL_INTRIN_2(maxp_@sfx@, v@sfx@, v@sfx@, v@sfx@)
+#endif
+
/***************************
* Mask operations
***************************/
@@ -607,6 +612,11 @@ SIMD_INTRIN_DEF(@intrin@_@sfx@)
/**end repeat1**/
#endif
+#if @fp_only@
+SIMD_INTRIN_DEF(max_@sfx@)
+SIMD_INTRIN_DEF(maxp_@sfx@)
+#endif
+
/***************************
* Mask operations
***************************/
diff --git a/numpy/core/src/common/simd/avx2/math.h b/numpy/core/src/common/simd/avx2/math.h
index b3eba6f5f..84ab024d0 100644
--- a/numpy/core/src/common/simd/avx2/math.h
+++ b/numpy/core/src/common/simd/avx2/math.h
@@ -37,4 +37,23 @@ NPY_FINLINE npyv_f32 npyv_square_f32(npyv_f32 a)
NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a)
{ return _mm256_mul_pd(a, a); }
+// Maximum, natively mapping with no guarantees to handle NaN.
+#define npyv_max_f32 _mm256_max_ps
+#define npyv_max_f64 _mm256_max_pd
+// Maximum, supports IEEE floating-point arithmetic (IEC 60559),
+// - If one of the two vectors contains NaN, the equivalent element of the other vector is set
+// - Only if both corresponded elements are NaN, NaN is set.
+NPY_FINLINE npyv_f32 npyv_maxp_f32(npyv_f32 a, npyv_f32 b)
+{
+ __m256 nn = _mm256_cmp_ps(b, b, _CMP_ORD_Q);
+ __m256 max = _mm256_max_ps(a, b);
+ return _mm256_blendv_ps(a, max, nn);
+}
+NPY_FINLINE npyv_f64 npyv_maxp_f64(npyv_f64 a, npyv_f64 b)
+{
+ __m256d nn = _mm256_cmp_pd(b, b, _CMP_ORD_Q);
+ __m256d max = _mm256_max_pd(a, b);
+ return _mm256_blendv_pd(a, max, nn);
+}
+
#endif
diff --git a/numpy/core/src/common/simd/avx512/math.h b/numpy/core/src/common/simd/avx512/math.h
index 1db710670..668362298 100644
--- a/numpy/core/src/common/simd/avx512/math.h
+++ b/numpy/core/src/common/simd/avx512/math.h
@@ -46,4 +46,21 @@ NPY_FINLINE npyv_f32 npyv_square_f32(npyv_f32 a)
NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a)
{ return _mm512_mul_pd(a, a); }
+// Maximum, natively mapping with no guarantees to handle NaN.
+#define npyv_max_f32 _mm512_max_ps
+#define npyv_max_f64 _mm512_max_pd
+// Maximum, supports IEEE floating-point arithmetic (IEC 60559),
+// - If one of the two vectors contains NaN, the equivalent element of the other vector is set
+// - Only if both corresponded elements are NaN, NaN is set.
+NPY_FINLINE npyv_f32 npyv_maxp_f32(npyv_f32 a, npyv_f32 b)
+{
+ __mmask16 nn = _mm512_cmp_ps_mask(b, b, _CMP_ORD_Q);
+ return _mm512_mask_max_ps(a, nn, a, b);
+}
+NPY_FINLINE npyv_f64 npyv_maxp_f64(npyv_f64 a, npyv_f64 b)
+{
+ __mmask8 nn = _mm512_cmp_pd_mask(b, b, _CMP_ORD_Q);
+ return _mm512_mask_max_pd(a, nn, a, b);
+}
+
#endif
diff --git a/numpy/core/src/common/simd/neon/math.h b/numpy/core/src/common/simd/neon/math.h
index a2bbdf2a5..09b7bbc9e 100644
--- a/numpy/core/src/common/simd/neon/math.h
+++ b/numpy/core/src/common/simd/neon/math.h
@@ -83,4 +83,24 @@ NPY_FINLINE npyv_f32 npyv_recip_f32(npyv_f32 a)
}
#endif // NPY_SIMD_F64
+// Maximum, natively mapping with no guarantees to handle NaN.
+#define npyv_max_f32 vmaxq_f32
+#define npyv_max_f64 vmaxq_f64
+// Maximum, supports IEEE floating-point arithmetic (IEC 60559),
+// - If one of the two vectors contains NaN, the equivalent element of the other vector is set
+// - Only if both corresponded elements are NaN, NaN is set.
+#ifdef NPY_HAVE_ASIMD
+ #define npyv_maxp_f32 vmaxnmq_f32
+#else
+ NPY_FINLINE npyv_f32 npyv_maxp_f32(npyv_f32 a, npyv_f32 b)
+ {
+ npyv_u32 nn_a = vceqq_f32(a, a);
+ npyv_u32 nn_b = vceqq_f32(b, b);
+ return vmaxq_f32(vbslq_f32(nn_a, a, b), vbslq_f32(nn_b, b, a));
+ }
+#endif
+#if NPY_SIMD_F64
+ #define npyv_maxp_f64 vmaxnmq_f64
+#endif // NPY_SIMD_F64
+
#endif // _NPY_SIMD_SSE_MATH_H
diff --git a/numpy/core/src/common/simd/sse/math.h b/numpy/core/src/common/simd/sse/math.h
index b7203cd89..15e9f7e44 100644
--- a/numpy/core/src/common/simd/sse/math.h
+++ b/numpy/core/src/common/simd/sse/math.h
@@ -37,4 +37,23 @@ NPY_FINLINE npyv_f32 npyv_square_f32(npyv_f32 a)
NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a)
{ return _mm_mul_pd(a, a); }
+// Maximum, natively mapping with no guarantees to handle NaN.
+#define npyv_max_f32 _mm_max_ps
+#define npyv_max_f64 _mm_max_pd
+// Maximum, supports IEEE floating-point arithmetic (IEC 60559),
+// - If one of the two vectors contains NaN, the equivalent element of the other vector is set
+// - Only if both corresponded elements are NaN, NaN is set.
+NPY_FINLINE npyv_f32 npyv_maxp_f32(npyv_f32 a, npyv_f32 b)
+{
+ __m128 nn = _mm_cmpord_ps(b, b);
+ __m128 max = _mm_max_ps(a, b);
+ return npyv_select_f32(_mm_castps_si128(nn), max, a);
+}
+NPY_FINLINE npyv_f64 npyv_maxp_f64(npyv_f64 a, npyv_f64 b)
+{
+ __m128d nn = _mm_cmpord_pd(b, b);
+ __m128d max = _mm_max_pd(a, b);
+ return npyv_select_f64(_mm_castpd_si128(nn), max, a);
+}
+
#endif
diff --git a/numpy/core/src/common/simd/vsx/math.h b/numpy/core/src/common/simd/vsx/math.h
index 7c8610b19..11bacd703 100644
--- a/numpy/core/src/common/simd/vsx/math.h
+++ b/numpy/core/src/common/simd/vsx/math.h
@@ -33,4 +33,13 @@ NPY_FINLINE npyv_f32 npyv_square_f32(npyv_f32 a)
NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a)
{ return vec_mul(a, a); }
+// Maximum, natively mapping with no guarantees to handle NaN.
+#define npyv_max_f32 vec_max
+#define npyv_max_f64 vec_max
+// Maximum, supports IEEE floating-point arithmetic (IEC 60559),
+// - If one of the two vectors contains NaN, the equivalent element of the other vector is set
+// - Only if both corresponded elements are NaN, NaN is set.
+#define npyv_maxp_f32 vec_max
+#define npyv_maxp_f64 vec_max
+
#endif // _NPY_SIMD_VSX_MATH_H
diff --git a/numpy/core/tests/test_simd.py b/numpy/core/tests/test_simd.py
index 8306a86d3..fb7dd88cf 100644
--- a/numpy/core/tests/test_simd.py
+++ b/numpy/core/tests/test_simd.py
@@ -315,6 +315,36 @@ class _SIMD_FP(_Test_Utility):
data_square = [x*x for x in data]
square = self.square(vdata)
assert square == data_square
+
+ def test_max(self):
+ """
+ Test intrinics:
+ npyv_max_##SFX
+ npyv_maxp_##SFX
+ """
+ data_a = self._data()
+ data_b = self._data(self.nlanes)
+ vdata_a, vdata_b = self.load(data_a), self.load(data_b)
+ data_max = [max(a, b) for a, b in zip(data_a, data_b)]
+ _max = self.max(vdata_a, vdata_b)
+ assert _max == data_max
+ maxp = self.maxp(vdata_a, vdata_b)
+ assert maxp == data_max
+ # test IEEE standards
+ pinf, ninf, nan = self._pinfinity(), self._ninfinity(), self._nan()
+ max_cases = ((nan, nan, nan), (nan, 10, 10), (10, nan, 10),
+ (pinf, pinf, pinf), (pinf, 10, pinf), (10, pinf, pinf),
+ (ninf, ninf, ninf), (ninf, 10, 10), (10, ninf, 10))
+ for case_operand1, case_operand2, desired in max_cases:
+ data_max = [desired]*self.nlanes
+ vdata_a = self.setall(case_operand1)
+ vdata_b = self.setall(case_operand2)
+ maxp = self.maxp(vdata_a, vdata_b)
+ assert maxp == pytest.approx(data_max, nan_ok=True)
+ if nan in (case_operand1, case_operand2, desired):
+ continue
+ _max = self.max(vdata_a, vdata_b)
+ assert _max == data_max
def test_reciprocal(self):
pinf, ninf, nan = self._pinfinity(), self._ninfinity(), self._nan()