summaryrefslogtreecommitdiff
path: root/numpy/distutils/checks
diff options
context:
space:
mode:
authorSayed Adel <seiko@imavr.com>2020-06-13 18:15:25 +0200
committerSayed Adel <seiko@imavr.com>2020-06-15 22:48:29 +0200
commitda21d28ef69e65c5bfef8dc22840fe16fec52540 (patch)
treedcff74da7f8712f0328d3c1ed49527a0fd6f131d /numpy/distutils/checks
parent5345c2575a28fa2dfbbec83c99636669476c2745 (diff)
downloadnumpy-da21d28ef69e65c5bfef8dc22840fe16fec52540.tar.gz
ENH: [1/7] enable multi-platform SIMD compiler optimizations
Implement new distutils class `CCompilerOpt`, used for handling the CPU/hardware optimization, starting from parsing the command arguments, to managing the relationship between the CPU baseline and dispatch-able features, also generating the required C headers and ending with compiling the sources with proper compiler's flags. `CCompilerOpt` mainly used as a helper class for `CCompiler`, and doesn't provide any runtime detection for the CPU features, instead only focuses on the compiler side, but it generates abstract C headers that can be used later for the final runtime dispatching process.
Diffstat (limited to 'numpy/distutils/checks')
-rw-r--r--numpy/distutils/checks/cpu_asimd.c25
-rw-r--r--numpy/distutils/checks/cpu_asimddp.c15
-rw-r--r--numpy/distutils/checks/cpu_asimdfhm.c17
-rw-r--r--numpy/distutils/checks/cpu_asimdhp.c14
-rw-r--r--numpy/distutils/checks/cpu_avx.c7
-rw-r--r--numpy/distutils/checks/cpu_avx2.c7
-rw-r--r--numpy/distutils/checks/cpu_avx512_clx.c8
-rw-r--r--numpy/distutils/checks/cpu_avx512_cnl.c10
-rw-r--r--numpy/distutils/checks/cpu_avx512_icl.c12
-rw-r--r--numpy/distutils/checks/cpu_avx512_knl.c11
-rw-r--r--numpy/distutils/checks/cpu_avx512_knm.c17
-rw-r--r--numpy/distutils/checks/cpu_avx512_skx.c12
-rw-r--r--numpy/distutils/checks/cpu_avx512cd.c7
-rw-r--r--numpy/distutils/checks/cpu_avx512f.c7
-rw-r--r--numpy/distutils/checks/cpu_f16c.c9
-rw-r--r--numpy/distutils/checks/cpu_fma3.c8
-rw-r--r--numpy/distutils/checks/cpu_fma4.c12
-rw-r--r--numpy/distutils/checks/cpu_neon.c15
-rw-r--r--numpy/distutils/checks/cpu_neon_fp16.c11
-rw-r--r--numpy/distutils/checks/cpu_neon_vfpv4.c19
-rw-r--r--numpy/distutils/checks/cpu_popcnt.c23
-rw-r--r--numpy/distutils/checks/cpu_sse.c7
-rw-r--r--numpy/distutils/checks/cpu_sse2.c7
-rw-r--r--numpy/distutils/checks/cpu_sse3.c7
-rw-r--r--numpy/distutils/checks/cpu_sse41.c7
-rw-r--r--numpy/distutils/checks/cpu_sse42.c7
-rw-r--r--numpy/distutils/checks/cpu_ssse3.c7
-rw-r--r--numpy/distutils/checks/cpu_vsx.c21
-rw-r--r--numpy/distutils/checks/cpu_vsx2.c13
-rw-r--r--numpy/distutils/checks/cpu_vsx3.c13
-rw-r--r--numpy/distutils/checks/cpu_xop.c12
-rw-r--r--numpy/distutils/checks/test_flags.c1
32 files changed, 368 insertions, 0 deletions
diff --git a/numpy/distutils/checks/cpu_asimd.c b/numpy/distutils/checks/cpu_asimd.c
new file mode 100644
index 000000000..8df556b6c
--- /dev/null
+++ b/numpy/distutils/checks/cpu_asimd.c
@@ -0,0 +1,25 @@
+#ifdef _MSC_VER
+ #include <Intrin.h>
+#endif
+#include <arm_neon.h>
+
+int main(void)
+{
+ float32x4_t v1 = vdupq_n_f32(1.0f), v2 = vdupq_n_f32(2.0f);
+ /* MAXMIN */
+ int ret = (int)vgetq_lane_f32(vmaxnmq_f32(v1, v2), 0);
+ ret += (int)vgetq_lane_f32(vminnmq_f32(v1, v2), 0);
+ /* ROUNDING */
+ ret += (int)vgetq_lane_f32(vrndq_f32(v1), 0);
+#ifdef __aarch64__
+ {
+ float64x2_t vd1 = vdupq_n_f64(1.0), vd2 = vdupq_n_f64(2.0);
+ /* MAXMIN */
+ ret += (int)vgetq_lane_f64(vmaxnmq_f64(vd1, vd2), 0);
+ ret += (int)vgetq_lane_f64(vminnmq_f64(vd1, vd2), 0);
+ /* ROUNDING */
+ ret += (int)vgetq_lane_f64(vrndq_f64(vd1), 0);
+ }
+#endif
+ return ret;
+}
diff --git a/numpy/distutils/checks/cpu_asimddp.c b/numpy/distutils/checks/cpu_asimddp.c
new file mode 100644
index 000000000..0158d1354
--- /dev/null
+++ b/numpy/distutils/checks/cpu_asimddp.c
@@ -0,0 +1,15 @@
+#ifdef _MSC_VER
+ #include <Intrin.h>
+#endif
+#include <arm_neon.h>
+
+int main(void)
+{
+ uint8x16_t v1 = vdupq_n_u8((unsigned char)1), v2 = vdupq_n_u8((unsigned char)2);
+ uint32x4_t va = vdupq_n_u32(3);
+ int ret = (int)vgetq_lane_u32(vdotq_u32(va, v1, v2), 0);
+#ifdef __aarch64__
+ ret += (int)vgetq_lane_u32(vdotq_laneq_u32(va, v1, v2, 0), 0);
+#endif
+ return ret;
+}
diff --git a/numpy/distutils/checks/cpu_asimdfhm.c b/numpy/distutils/checks/cpu_asimdfhm.c
new file mode 100644
index 000000000..bb437aa40
--- /dev/null
+++ b/numpy/distutils/checks/cpu_asimdfhm.c
@@ -0,0 +1,17 @@
+#ifdef _MSC_VER
+ #include <Intrin.h>
+#endif
+#include <arm_neon.h>
+
+int main(void)
+{
+ float16x8_t vhp = vdupq_n_f16((float16_t)1);
+ float16x4_t vlhp = vdup_n_f16((float16_t)1);
+ float32x4_t vf = vdupq_n_f32(1.0f);
+ float32x2_t vlf = vdup_n_f32(1.0f);
+
+ int ret = (int)vget_lane_f32(vfmlal_low_u32(vlf, vlhp, vlhp), 0);
+ ret += (int)vgetq_lane_f32(vfmlslq_high_u32(vf, vhp, vhp), 0);
+
+ return ret;
+}
diff --git a/numpy/distutils/checks/cpu_asimdhp.c b/numpy/distutils/checks/cpu_asimdhp.c
new file mode 100644
index 000000000..80b94000f
--- /dev/null
+++ b/numpy/distutils/checks/cpu_asimdhp.c
@@ -0,0 +1,14 @@
+#ifdef _MSC_VER
+ #include <Intrin.h>
+#endif
+#include <arm_neon.h>
+
+int main(void)
+{
+ float16x8_t vhp = vdupq_n_f16((float16_t)-1);
+ float16x4_t vlhp = vdup_n_f16((float16_t)-1);
+
+ int ret = (int)vgetq_lane_f16(vabdq_f16(vhp, vhp), 0);
+ ret += (int)vget_lane_f16(vabd_f16(vlhp, vlhp), 0);
+ return ret;
+}
diff --git a/numpy/distutils/checks/cpu_avx.c b/numpy/distutils/checks/cpu_avx.c
new file mode 100644
index 000000000..737c0d2e9
--- /dev/null
+++ b/numpy/distutils/checks/cpu_avx.c
@@ -0,0 +1,7 @@
+#include <immintrin.h>
+
+int main(void)
+{
+ __m256 a = _mm256_add_ps(_mm256_setzero_ps(), _mm256_setzero_ps());
+ return (int)_mm_cvtss_f32(_mm256_castps256_ps128(a));
+}
diff --git a/numpy/distutils/checks/cpu_avx2.c b/numpy/distutils/checks/cpu_avx2.c
new file mode 100644
index 000000000..dfb11fd79
--- /dev/null
+++ b/numpy/distutils/checks/cpu_avx2.c
@@ -0,0 +1,7 @@
+#include <immintrin.h>
+
+int main(void)
+{
+ __m256i a = _mm256_abs_epi16(_mm256_setzero_si256());
+ return _mm_cvtsi128_si32(_mm256_castsi256_si128(a));
+}
diff --git a/numpy/distutils/checks/cpu_avx512_clx.c b/numpy/distutils/checks/cpu_avx512_clx.c
new file mode 100644
index 000000000..71dad83a7
--- /dev/null
+++ b/numpy/distutils/checks/cpu_avx512_clx.c
@@ -0,0 +1,8 @@
+#include <immintrin.h>
+
+int main(void)
+{
+ /* VNNI */
+ __m512i a = _mm512_dpbusd_epi32(_mm512_setzero_si512(), _mm512_setzero_si512(), _mm512_setzero_si512());
+ return _mm_cvtsi128_si32(_mm512_castsi512_si128(a));
+}
diff --git a/numpy/distutils/checks/cpu_avx512_cnl.c b/numpy/distutils/checks/cpu_avx512_cnl.c
new file mode 100644
index 000000000..dfab4436d
--- /dev/null
+++ b/numpy/distutils/checks/cpu_avx512_cnl.c
@@ -0,0 +1,10 @@
+#include <immintrin.h>
+
+int main(void)
+{
+ /* IFMA */
+ __m512i a = _mm512_madd52hi_epu64(_mm512_setzero_si512(), _mm512_setzero_si512(), _mm512_setzero_si512());
+ /* VMBI */
+ a = _mm512_permutex2var_epi8(a, _mm512_setzero_si512(), _mm512_setzero_si512());
+ return _mm_cvtsi128_si32(_mm512_castsi512_si128(a));
+}
diff --git a/numpy/distutils/checks/cpu_avx512_icl.c b/numpy/distutils/checks/cpu_avx512_icl.c
new file mode 100644
index 000000000..cf2706b3b
--- /dev/null
+++ b/numpy/distutils/checks/cpu_avx512_icl.c
@@ -0,0 +1,12 @@
+#include <immintrin.h>
+
+int main(void)
+{
+ /* VBMI2 */
+ __m512i a = _mm512_shrdv_epi64(_mm512_setzero_si512(), _mm512_setzero_si512(), _mm512_setzero_si512());
+ /* BITLAG */
+ a = _mm512_popcnt_epi8(a);
+ /* VPOPCNTDQ */
+ a = _mm512_popcnt_epi64(a);
+ return _mm_cvtsi128_si32(_mm512_castsi512_si128(a));
+}
diff --git a/numpy/distutils/checks/cpu_avx512_knl.c b/numpy/distutils/checks/cpu_avx512_knl.c
new file mode 100644
index 000000000..0699f37a6
--- /dev/null
+++ b/numpy/distutils/checks/cpu_avx512_knl.c
@@ -0,0 +1,11 @@
+#include <immintrin.h>
+
+int main(void)
+{
+ int base[128];
+ /* ER */
+ __m512i a = _mm512_castpd_si512(_mm512_exp2a23_pd(_mm512_setzero_pd()));
+ /* PF */
+ _mm512_mask_prefetch_i64scatter_pd(base, _mm512_cmpeq_epi64_mask(a, a), a, 1, _MM_HINT_T1);
+ return base[0];
+}
diff --git a/numpy/distutils/checks/cpu_avx512_knm.c b/numpy/distutils/checks/cpu_avx512_knm.c
new file mode 100644
index 000000000..db61b4bfa
--- /dev/null
+++ b/numpy/distutils/checks/cpu_avx512_knm.c
@@ -0,0 +1,17 @@
+#include <immintrin.h>
+
+int main(void)
+{
+ __m512i a = _mm512_setzero_si512();
+ __m512 b = _mm512_setzero_ps();
+
+ /* 4FMAPS */
+ b = _mm512_4fmadd_ps(b, b, b, b, b, NULL);
+ /* 4VNNIW */
+ a = _mm512_4dpwssd_epi32(a, a, a, a, a, NULL);
+ /* VPOPCNTDQ */
+ a = _mm512_popcnt_epi64(a);
+
+ a = _mm512_add_epi32(a, _mm512_castps_si512(b));
+ return _mm_cvtsi128_si32(_mm512_castsi512_si128(a));
+}
diff --git a/numpy/distutils/checks/cpu_avx512_skx.c b/numpy/distutils/checks/cpu_avx512_skx.c
new file mode 100644
index 000000000..1d5e15b5e
--- /dev/null
+++ b/numpy/distutils/checks/cpu_avx512_skx.c
@@ -0,0 +1,12 @@
+#include <immintrin.h>
+
+int main(void)
+{
+ /* VL */
+ __m256i a = _mm256_abs_epi64(_mm256_setzero_si256());
+ /* DQ */
+ __m512i b = _mm512_broadcast_i32x8(a);
+ /* BW */
+ b = _mm512_abs_epi16(b);
+ return _mm_cvtsi128_si32(_mm512_castsi512_si128(b));
+}
diff --git a/numpy/distutils/checks/cpu_avx512cd.c b/numpy/distutils/checks/cpu_avx512cd.c
new file mode 100644
index 000000000..61bef6b82
--- /dev/null
+++ b/numpy/distutils/checks/cpu_avx512cd.c
@@ -0,0 +1,7 @@
+#include <immintrin.h>
+
+int main(void)
+{
+ __m512i a = _mm512_lzcnt_epi32(_mm512_setzero_si512());
+ return _mm_cvtsi128_si32(_mm512_castsi512_si128(a));
+}
diff --git a/numpy/distutils/checks/cpu_avx512f.c b/numpy/distutils/checks/cpu_avx512f.c
new file mode 100644
index 000000000..f60cc09dd
--- /dev/null
+++ b/numpy/distutils/checks/cpu_avx512f.c
@@ -0,0 +1,7 @@
+#include <immintrin.h>
+
+int main(void)
+{
+ __m512i a = _mm512_abs_epi32(_mm512_setzero_si512());
+ return _mm_cvtsi128_si32(_mm512_castsi512_si128(a));
+}
diff --git a/numpy/distutils/checks/cpu_f16c.c b/numpy/distutils/checks/cpu_f16c.c
new file mode 100644
index 000000000..a5a343e2d
--- /dev/null
+++ b/numpy/distutils/checks/cpu_f16c.c
@@ -0,0 +1,9 @@
+#include <emmintrin.h>
+#include <immintrin.h>
+
+int main(void)
+{
+ __m128 a = _mm_cvtph_ps(_mm_setzero_si128());
+ __m256 a8 = _mm256_cvtph_ps(_mm_setzero_si128());
+ return (int)(_mm_cvtss_f32(a) + _mm_cvtss_f32(_mm256_castps256_ps128(a8)));
+}
diff --git a/numpy/distutils/checks/cpu_fma3.c b/numpy/distutils/checks/cpu_fma3.c
new file mode 100644
index 000000000..cf34c6cb1
--- /dev/null
+++ b/numpy/distutils/checks/cpu_fma3.c
@@ -0,0 +1,8 @@
+#include <xmmintrin.h>
+#include <immintrin.h>
+
+int main(void)
+{
+ __m256 a = _mm256_fmadd_ps(_mm256_setzero_ps(), _mm256_setzero_ps(), _mm256_setzero_ps());
+ return (int)_mm_cvtss_f32(_mm256_castps256_ps128(a));
+}
diff --git a/numpy/distutils/checks/cpu_fma4.c b/numpy/distutils/checks/cpu_fma4.c
new file mode 100644
index 000000000..1ad717033
--- /dev/null
+++ b/numpy/distutils/checks/cpu_fma4.c
@@ -0,0 +1,12 @@
+#include <immintrin.h>
+#ifdef _MSC_VER
+ #include <ammintrin.h>
+#else
+ #include <x86intrin.h>
+#endif
+
+int main(void)
+{
+ __m256 a = _mm256_macc_ps(_mm256_setzero_ps(), _mm256_setzero_ps(), _mm256_setzero_ps());
+ return (int)_mm_cvtss_f32(_mm256_castps256_ps128(a));
+}
diff --git a/numpy/distutils/checks/cpu_neon.c b/numpy/distutils/checks/cpu_neon.c
new file mode 100644
index 000000000..4eab1f384
--- /dev/null
+++ b/numpy/distutils/checks/cpu_neon.c
@@ -0,0 +1,15 @@
+#ifdef _MSC_VER
+ #include <Intrin.h>
+#endif
+#include <arm_neon.h>
+
+int main(void)
+{
+ float32x4_t v1 = vdupq_n_f32(1.0f), v2 = vdupq_n_f32(2.0f);
+ int ret = (int)vgetq_lane_f32(vmulq_f32(v1, v2), 0);
+#ifdef __aarch64__
+ float64x2_t vd1 = vdupq_n_f64(1.0), vd2 = vdupq_n_f64(2.0);
+ ret += (int)vgetq_lane_f64(vmulq_f64(vd1, vd2), 0);
+#endif
+ return ret;
+}
diff --git a/numpy/distutils/checks/cpu_neon_fp16.c b/numpy/distutils/checks/cpu_neon_fp16.c
new file mode 100644
index 000000000..745d2e793
--- /dev/null
+++ b/numpy/distutils/checks/cpu_neon_fp16.c
@@ -0,0 +1,11 @@
+#ifdef _MSC_VER
+ #include <Intrin.h>
+#endif
+#include <arm_neon.h>
+
+int main(void)
+{
+ short z4[] = {0, 0, 0, 0, 0, 0, 0, 0};
+ float32x4_t v_z4 = vcvt_f32_f16((float16x4_t)vld1_s16((const short*)z4));
+ return (int)vgetq_lane_f32(v_z4, 0);
+}
diff --git a/numpy/distutils/checks/cpu_neon_vfpv4.c b/numpy/distutils/checks/cpu_neon_vfpv4.c
new file mode 100644
index 000000000..45f7b5d69
--- /dev/null
+++ b/numpy/distutils/checks/cpu_neon_vfpv4.c
@@ -0,0 +1,19 @@
+#ifdef _MSC_VER
+ #include <Intrin.h>
+#endif
+#include <arm_neon.h>
+
+int main(void)
+{
+ float32x4_t v1 = vdupq_n_f32(1.0f);
+ float32x4_t v2 = vdupq_n_f32(2.0f);
+ float32x4_t v3 = vdupq_n_f32(3.0f);
+ int ret = (int)vgetq_lane_f32(vfmaq_f32(v1, v2, v3), 0);
+#ifdef __aarch64__
+ float64x2_t vd1 = vdupq_n_f64(1.0);
+ float64x2_t vd2 = vdupq_n_f64(2.0);
+ float64x2_t vd3 = vdupq_n_f64(3.0);
+ ret += (int)vgetq_lane_f64(vfmaq_f64(vd1, vd2, vd3), 0);
+#endif
+ return ret;
+}
diff --git a/numpy/distutils/checks/cpu_popcnt.c b/numpy/distutils/checks/cpu_popcnt.c
new file mode 100644
index 000000000..e6a80fb40
--- /dev/null
+++ b/numpy/distutils/checks/cpu_popcnt.c
@@ -0,0 +1,23 @@
+#ifdef _MSC_VER
+ #include <nmmintrin.h>
+#else
+ #include <popcntintrin.h>
+#endif
+
+int main(void)
+{
+ long long a = 0;
+ int b;
+#ifdef _MSC_VER
+ #ifdef _M_X64
+ a = _mm_popcnt_u64(1);
+ #endif
+ b = _mm_popcnt_u32(1);
+#else
+ #ifdef __x86_64__
+ a = __builtin_popcountll(1);
+ #endif
+ b = __builtin_popcount(1);
+#endif
+ return (int)a + b;
+}
diff --git a/numpy/distutils/checks/cpu_sse.c b/numpy/distutils/checks/cpu_sse.c
new file mode 100644
index 000000000..bb98bf63c
--- /dev/null
+++ b/numpy/distutils/checks/cpu_sse.c
@@ -0,0 +1,7 @@
+#include <xmmintrin.h>
+
+int main(void)
+{
+ __m128 a = _mm_add_ps(_mm_setzero_ps(), _mm_setzero_ps());
+ return (int)_mm_cvtss_f32(a);
+}
diff --git a/numpy/distutils/checks/cpu_sse2.c b/numpy/distutils/checks/cpu_sse2.c
new file mode 100644
index 000000000..658afc9b4
--- /dev/null
+++ b/numpy/distutils/checks/cpu_sse2.c
@@ -0,0 +1,7 @@
+#include <emmintrin.h>
+
+int main(void)
+{
+ __m128i a = _mm_add_epi16(_mm_setzero_si128(), _mm_setzero_si128());
+ return _mm_cvtsi128_si32(a);
+}
diff --git a/numpy/distutils/checks/cpu_sse3.c b/numpy/distutils/checks/cpu_sse3.c
new file mode 100644
index 000000000..aece1e601
--- /dev/null
+++ b/numpy/distutils/checks/cpu_sse3.c
@@ -0,0 +1,7 @@
+#include <pmmintrin.h>
+
+int main(void)
+{
+ __m128 a = _mm_hadd_ps(_mm_setzero_ps(), _mm_setzero_ps());
+ return (int)_mm_cvtss_f32(a);
+}
diff --git a/numpy/distutils/checks/cpu_sse41.c b/numpy/distutils/checks/cpu_sse41.c
new file mode 100644
index 000000000..bfdb9feac
--- /dev/null
+++ b/numpy/distutils/checks/cpu_sse41.c
@@ -0,0 +1,7 @@
+#include <smmintrin.h>
+
+int main(void)
+{
+ __m128 a = _mm_floor_ps(_mm_setzero_ps());
+ return (int)_mm_cvtss_f32(a);
+}
diff --git a/numpy/distutils/checks/cpu_sse42.c b/numpy/distutils/checks/cpu_sse42.c
new file mode 100644
index 000000000..24f5d93fe
--- /dev/null
+++ b/numpy/distutils/checks/cpu_sse42.c
@@ -0,0 +1,7 @@
+#include <smmintrin.h>
+
+int main(void)
+{
+ __m128 a = _mm_hadd_ps(_mm_setzero_ps(), _mm_setzero_ps());
+ return (int)_mm_cvtss_f32(a);
+}
diff --git a/numpy/distutils/checks/cpu_ssse3.c b/numpy/distutils/checks/cpu_ssse3.c
new file mode 100644
index 000000000..ad0abc1e6
--- /dev/null
+++ b/numpy/distutils/checks/cpu_ssse3.c
@@ -0,0 +1,7 @@
+#include <tmmintrin.h>
+
+int main(void)
+{
+ __m128i a = _mm_hadd_epi16(_mm_setzero_si128(), _mm_setzero_si128());
+ return (int)_mm_cvtsi128_si32(a);
+}
diff --git a/numpy/distutils/checks/cpu_vsx.c b/numpy/distutils/checks/cpu_vsx.c
new file mode 100644
index 000000000..0b3f30d6a
--- /dev/null
+++ b/numpy/distutils/checks/cpu_vsx.c
@@ -0,0 +1,21 @@
+#ifndef __VSX__
+ #error "VSX is not supported"
+#endif
+#include <altivec.h>
+
+#if (defined(__GNUC__) && !defined(vec_xl)) || (defined(__clang__) && !defined(__IBMC__))
+ #define vsx_ld vec_vsx_ld
+ #define vsx_st vec_vsx_st
+#else
+ #define vsx_ld vec_xl
+ #define vsx_st vec_xst
+#endif
+
+int main(void)
+{
+ unsigned int zout[4];
+ unsigned int z4[] = {0, 0, 0, 0};
+ __vector unsigned int v_z4 = vsx_ld(0, z4);
+ vsx_st(v_z4, 0, zout);
+ return zout[0];
+}
diff --git a/numpy/distutils/checks/cpu_vsx2.c b/numpy/distutils/checks/cpu_vsx2.c
new file mode 100644
index 000000000..410fb29d6
--- /dev/null
+++ b/numpy/distutils/checks/cpu_vsx2.c
@@ -0,0 +1,13 @@
+#ifndef __VSX__
+ #error "VSX is not supported"
+#endif
+#include <altivec.h>
+
+typedef __vector unsigned long long v_uint64x2;
+
+int main(void)
+{
+ v_uint64x2 z2 = (v_uint64x2){0, 0};
+ z2 = (v_uint64x2)vec_cmpeq(z2, z2);
+ return (int)vec_extract(z2, 0);
+}
diff --git a/numpy/distutils/checks/cpu_vsx3.c b/numpy/distutils/checks/cpu_vsx3.c
new file mode 100644
index 000000000..857526535
--- /dev/null
+++ b/numpy/distutils/checks/cpu_vsx3.c
@@ -0,0 +1,13 @@
+#ifndef __VSX__
+ #error "VSX is not supported"
+#endif
+#include <altivec.h>
+
+typedef __vector unsigned int v_uint32x4;
+
+int main(void)
+{
+ v_uint32x4 z4 = (v_uint32x4){0, 0, 0, 0};
+ z4 = vec_absd(z4, z4);
+ return (int)vec_extract(z4, 0);
+}
diff --git a/numpy/distutils/checks/cpu_xop.c b/numpy/distutils/checks/cpu_xop.c
new file mode 100644
index 000000000..51d70cf2b
--- /dev/null
+++ b/numpy/distutils/checks/cpu_xop.c
@@ -0,0 +1,12 @@
+#include <immintrin.h>
+#ifdef _MSC_VER
+ #include <ammintrin.h>
+#else
+ #include <x86intrin.h>
+#endif
+
+int main(void)
+{
+ __m128i a = _mm_comge_epu32(_mm_setzero_si128(), _mm_setzero_si128());
+ return _mm_cvtsi128_si32(a);
+}
diff --git a/numpy/distutils/checks/test_flags.c b/numpy/distutils/checks/test_flags.c
new file mode 100644
index 000000000..4cd09d42a
--- /dev/null
+++ b/numpy/distutils/checks/test_flags.c
@@ -0,0 +1 @@
+int test_flags;