ENH: Add max values comparison for floating point

SIMD intrinsics for comparing double/single precision has been added. Now only NEON and SSE ones are added.
author: HowJmay <yuanyanghau@gmail.com> 2021-04-28 01:35:59 +0800
committer: HowJmay <yuanyanghau@gmail.com> 2021-05-01 01:40:02 +0800
commit: 18cb9b43e846ff1b42ce0b19d8909bd1be03773e (patch)
tree: 18a572aeeee88e418d108f66faafb46908a3a159 /numpy
parent: 967274c4cb8b67d4ecc0fbe2f0e17359cc6e1c97 (diff)
download: numpy-18cb9b43e846ff1b42ce0b19d8909bd1be03773e.tar.gz
7 files changed, 124 insertions, 0 deletions
diff --git a/numpy/core/src/_simd/_simd.dispatch.c.src b/numpy/core/src/_simd/_simd.dispatch.c.src
index 4acd20a75..d5984009e 100644
--- a/numpy/core/src/_simd/_simd.dispatch.c.src
+++ b/numpy/core/src/_simd/_simd.dispatch.c.src
@@ -387,6 +387,11 @@ SIMD_IMPL_INTRIN_1(@intrin@_@sfx@, v@sfx@, v@sfx@)
 /**end repeat1**/
 #endif
 
+#if @fp_only@
+SIMD_IMPL_INTRIN_2(max_@sfx@, v@sfx@, v@sfx@, v@sfx@)
+SIMD_IMPL_INTRIN_2(maxp_@sfx@, v@sfx@, v@sfx@, v@sfx@)
+#endif
+
 /***************************
  * Mask operations
  ***************************/
@@ -607,6 +612,11 @@ SIMD_INTRIN_DEF(@intrin@_@sfx@)
 /**end repeat1**/
 #endif
 
+#if @fp_only@
+SIMD_INTRIN_DEF(max_@sfx@)
+SIMD_INTRIN_DEF(maxp_@sfx@)
+#endif
+
 /***************************
  * Mask operations
  ***************************/
diff --git a/numpy/core/src/common/simd/avx2/math.h b/numpy/core/src/common/simd/avx2/math.h
index b3eba6f5f..84ab024d0 100644
--- a/numpy/core/src/common/simd/avx2/math.h
+++ b/numpy/core/src/common/simd/avx2/math.h
@@ -37,4 +37,23 @@ NPY_FINLINE npyv_f32 npyv_square_f32(npyv_f32 a)
 NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a)
 { return _mm256_mul_pd(a, a); }
 
+// Maximum, natively mapping with no guarantees to handle NaN.
+#define npyv_max_f32 _mm256_max_ps
+#define npyv_max_f64 _mm256_max_pd
+// Maximum, supports IEEE floating-point arithmetic (IEC 60559),
+// - If one of the two vectors contains NaN, the equivalent element of the other vector is set
+// - Only if both corresponded elements are NaN, NaN is set. 
+NPY_FINLINE npyv_f32 npyv_maxp_f32(npyv_f32 a, npyv_f32 b)
+{
+    __m256 nn  = _mm256_cmp_ps(b, b, _CMP_ORD_Q);
+    __m256 max = _mm256_max_ps(a, b);
+    return _mm256_blendv_ps(a, max, nn);
+}
+NPY_FINLINE npyv_f64 npyv_maxp_f64(npyv_f64 a, npyv_f64 b)
+{
+    __m256d nn  = _mm256_cmp_pd(b, b, _CMP_ORD_Q);
+    __m256d max = _mm256_max_pd(a, b);
+    return _mm256_blendv_pd(a, max, nn);
+}
+
 #endif
diff --git a/numpy/core/src/common/simd/avx512/math.h b/numpy/core/src/common/simd/avx512/math.h
index 1db710670..668362298 100644
--- a/numpy/core/src/common/simd/avx512/math.h
+++ b/numpy/core/src/common/simd/avx512/math.h
@@ -46,4 +46,21 @@ NPY_FINLINE npyv_f32 npyv_square_f32(npyv_f32 a)
 NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a)
 { return _mm512_mul_pd(a, a); }
 
+// Maximum, natively mapping with no guarantees to handle NaN.
+#define npyv_max_f32 _mm512_max_ps
+#define npyv_max_f64 _mm512_max_pd
+// Maximum, supports IEEE floating-point arithmetic (IEC 60559),
+// - If one of the two vectors contains NaN, the equivalent element of the other vector is set
+// - Only if both corresponded elements are NaN, NaN is set. 
+NPY_FINLINE npyv_f32 npyv_maxp_f32(npyv_f32 a, npyv_f32 b)
+{
+    __mmask16 nn = _mm512_cmp_ps_mask(b, b, _CMP_ORD_Q);
+    return _mm512_mask_max_ps(a, nn, a, b);
+}
+NPY_FINLINE npyv_f64 npyv_maxp_f64(npyv_f64 a, npyv_f64 b)
+{
+    __mmask8 nn = _mm512_cmp_pd_mask(b, b, _CMP_ORD_Q);
+    return _mm512_mask_max_pd(a, nn, a, b);
+}
+
 #endif
diff --git a/numpy/core/src/common/simd/neon/math.h b/numpy/core/src/common/simd/neon/math.h
index a2bbdf2a5..09b7bbc9e 100644
--- a/numpy/core/src/common/simd/neon/math.h
+++ b/numpy/core/src/common/simd/neon/math.h
@@ -83,4 +83,24 @@ NPY_FINLINE npyv_f32 npyv_recip_f32(npyv_f32 a)
     }
 #endif // NPY_SIMD_F64
 
+// Maximum, natively mapping with no guarantees to handle NaN.
+#define npyv_max_f32 vmaxq_f32
+#define npyv_max_f64 vmaxq_f64
+// Maximum, supports IEEE floating-point arithmetic (IEC 60559),
+// - If one of the two vectors contains NaN, the equivalent element of the other vector is set
+// - Only if both corresponded elements are NaN, NaN is set. 
+#ifdef NPY_HAVE_ASIMD
+    #define npyv_maxp_f32 vmaxnmq_f32
+#else
+    NPY_FINLINE npyv_f32 npyv_maxp_f32(npyv_f32 a, npyv_f32 b)
+    { 
+        npyv_u32 nn_a = vceqq_f32(a, a);
+        npyv_u32 nn_b = vceqq_f32(b, b);
+        return vmaxq_f32(vbslq_f32(nn_a, a, b), vbslq_f32(nn_b, b, a));
+    } 
+#endif
+#if NPY_SIMD_F64
+    #define npyv_maxp_f64 vmaxnmq_f64
+#endif // NPY_SIMD_F64
+
 #endif // _NPY_SIMD_SSE_MATH_H
diff --git a/numpy/core/src/common/simd/sse/math.h b/numpy/core/src/common/simd/sse/math.h
index b7203cd89..15e9f7e44 100644
--- a/numpy/core/src/common/simd/sse/math.h
+++ b/numpy/core/src/common/simd/sse/math.h
@@ -37,4 +37,23 @@ NPY_FINLINE npyv_f32 npyv_square_f32(npyv_f32 a)
 NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a)
 { return _mm_mul_pd(a, a); }
 
+// Maximum, natively mapping with no guarantees to handle NaN.
+#define npyv_max_f32 _mm_max_ps
+#define npyv_max_f64 _mm_max_pd
+// Maximum, supports IEEE floating-point arithmetic (IEC 60559),
+// - If one of the two vectors contains NaN, the equivalent element of the other vector is set
+// - Only if both corresponded elements are NaN, NaN is set. 
+NPY_FINLINE npyv_f32 npyv_maxp_f32(npyv_f32 a, npyv_f32 b)
+{
+    __m128 nn  = _mm_cmpord_ps(b, b);
+    __m128 max = _mm_max_ps(a, b);
+    return npyv_select_f32(_mm_castps_si128(nn), max, a);
+}
+NPY_FINLINE npyv_f64 npyv_maxp_f64(npyv_f64 a, npyv_f64 b)
+{
+    __m128d nn  = _mm_cmpord_pd(b, b);
+    __m128d max = _mm_max_pd(a, b);
+    return npyv_select_f64(_mm_castpd_si128(nn), max, a);
+}
+
 #endif
diff --git a/numpy/core/src/common/simd/vsx/math.h b/numpy/core/src/common/simd/vsx/math.h
index 7c8610b19..11bacd703 100644
--- a/numpy/core/src/common/simd/vsx/math.h
+++ b/numpy/core/src/common/simd/vsx/math.h
@@ -33,4 +33,13 @@ NPY_FINLINE npyv_f32 npyv_square_f32(npyv_f32 a)
 NPY_FINLINE npyv_f64 npyv_square_f64(npyv_f64 a)
 { return vec_mul(a, a); }
 
+// Maximum, natively mapping with no guarantees to handle NaN.
+#define npyv_max_f32 vec_max
+#define npyv_max_f64 vec_max
+// Maximum, supports IEEE floating-point arithmetic (IEC 60559),
+// - If one of the two vectors contains NaN, the equivalent element of the other vector is set
+// - Only if both corresponded elements are NaN, NaN is set. 
+#define npyv_maxp_f32 vec_max
+#define npyv_maxp_f64 vec_max
+
 #endif // _NPY_SIMD_VSX_MATH_H
diff --git a/numpy/core/tests/test_simd.py b/numpy/core/tests/test_simd.py
index 8306a86d3..fb7dd88cf 100644
--- a/numpy/core/tests/test_simd.py
+++ b/numpy/core/tests/test_simd.py
@@ -315,6 +315,36 @@ class _SIMD_FP(_Test_Utility):
         data_square = [x*x for x in data]
         square = self.square(vdata)
         assert square == data_square
+        
+    def test_max(self):
+        """
+        Test intrinics:
+            npyv_max_##SFX
+            npyv_maxp_##SFX
+        """
+        data_a = self._data()
+        data_b = self._data(self.nlanes)
+        vdata_a, vdata_b = self.load(data_a), self.load(data_b)
+        data_max = [max(a, b) for a, b in zip(data_a, data_b)]
+        _max = self.max(vdata_a, vdata_b)
+        assert _max == data_max
+        maxp = self.maxp(vdata_a, vdata_b)
+        assert maxp == data_max
+        # test IEEE standards
+        pinf, ninf, nan = self._pinfinity(), self._ninfinity(), self._nan()
+        max_cases = ((nan, nan, nan), (nan, 10, 10), (10, nan, 10),
+                     (pinf, pinf, pinf), (pinf, 10, pinf), (10, pinf, pinf),
+                     (ninf, ninf, ninf), (ninf, 10, 10), (10, ninf, 10))
+        for case_operand1, case_operand2, desired in max_cases:
+            data_max = [desired]*self.nlanes
+            vdata_a = self.setall(case_operand1)
+            vdata_b = self.setall(case_operand2)
+            maxp = self.maxp(vdata_a, vdata_b)
+            assert maxp == pytest.approx(data_max, nan_ok=True)
+            if nan in (case_operand1, case_operand2, desired):
+                continue
+            _max = self.max(vdata_a, vdata_b)
+            assert _max == data_max
 
     def test_reciprocal(self):
         pinf, ninf, nan = self._pinfinity(), self._ninfinity(), self._nan()
author	HowJmay <yuanyanghau@gmail.com>	2021-04-28 01:35:59 +0800
committer	HowJmay <yuanyanghau@gmail.com>	2021-05-01 01:40:02 +0800
commit	18cb9b43e846ff1b42ce0b19d8909bd1be03773e (patch)
tree	18a572aeeee88e418d108f66faafb46908a3a159 /numpy
parent	967274c4cb8b67d4ecc0fbe2f0e17359cc6e1c97 (diff)
download	numpy-18cb9b43e846ff1b42ce0b19d8909bd1be03773e.tar.gz