8 files changed, 84 insertions, 47 deletions
diff --git a/numpy/distutils/ccompiler_opt.py b/numpy/distutils/ccompiler_opt.py
index befc83c16..2019dcb25 100644
--- a/numpy/distutils/ccompiler_opt.py
+++ b/numpy/distutils/ccompiler_opt.py
@@ -955,51 +955,57 @@ class _CCompiler:
     def __init__(self):
         if hasattr(self, "cc_is_cached"):
             return
-        #      attr                regex
+        #      attr            regex        compiler-expression
         detect_arch = (
-            ("cc_on_x64",      ".*(x|x86_|amd)64.*"),
-            ("cc_on_x86",      ".*(win32|x86|i386|i686).*"),
-            ("cc_on_ppc64le",  ".*(powerpc|ppc)64(el|le).*"),
-            ("cc_on_ppc64",    ".*(powerpc|ppc)64.*"),
-            ("cc_on_aarch64",  ".*(aarch64|arm64).*"),
-            ("cc_on_armhf",    ".*arm.*"),
-            ("cc_on_s390x",    ".*s390x.*"),
+            ("cc_on_x64",      ".*(x|x86_|amd)64.*", ""),
+            ("cc_on_x86",      ".*(win32|x86|i386|i686).*", ""),
+            ("cc_on_ppc64le",  ".*(powerpc|ppc)64(el|le).*", ""),
+            ("cc_on_ppc64",    ".*(powerpc|ppc)64.*", ""),
+            ("cc_on_aarch64",  ".*(aarch64|arm64).*", ""),
+            ("cc_on_armhf",    ".*arm.*", "defined(__ARM_ARCH_7__) || "
+                                          "defined(__ARM_ARCH_7A__)"),
+            ("cc_on_s390x",    ".*s390x.*", ""),
             # undefined platform
-            ("cc_on_noarch",    ""),
+            ("cc_on_noarch",   "", ""),
         )
         detect_compiler = (
-            ("cc_is_gcc",     r".*(gcc|gnu\-g).*"),
-            ("cc_is_clang",    ".*clang.*"),
-            ("cc_is_iccw",     ".*(intelw|intelemw|iccw).*"), # intel msvc like
-            ("cc_is_icc",      ".*(intel|icc).*"), # intel unix like
-            ("cc_is_msvc",     ".*msvc.*"),
+            ("cc_is_gcc",     r".*(gcc|gnu\-g).*", ""),
+            ("cc_is_clang",    ".*clang.*", ""),
+            # intel msvc like
+            ("cc_is_iccw",     ".*(intelw|intelemw|iccw).*", ""),
+            ("cc_is_icc",      ".*(intel|icc).*", ""),  # intel unix like
+            ("cc_is_msvc",     ".*msvc.*", ""),
             # undefined compiler will be treat it as gcc
-            ("cc_is_nocc",     ""),
+            ("cc_is_nocc",     "", ""),
         )
         detect_args = (
-           ("cc_has_debug",  ".*(O0|Od|ggdb|coverage|debug:full).*"),
-           ("cc_has_native", ".*(-march=native|-xHost|/QxHost).*"),
+           ("cc_has_debug",  ".*(O0|Od|ggdb|coverage|debug:full).*", ""),
+           ("cc_has_native", ".*(-march=native|-xHost|/QxHost).*", ""),
            # in case if the class run with -DNPY_DISABLE_OPTIMIZATION
-           ("cc_noopt", ".*DISABLE_OPT.*"),
+           ("cc_noopt", ".*DISABLE_OPT.*", ""),
         )
 
         dist_info = self.dist_info()
         platform, compiler_info, extra_args = dist_info
         # set False to all attrs
         for section in (detect_arch, detect_compiler, detect_args):
-            for attr, rgex in section:
+            for attr, rgex, cexpr in section:
                 setattr(self, attr, False)
 
         for detect, searchin in ((detect_arch, platform), (detect_compiler, compiler_info)):
-            for attr, rgex in detect:
+            for attr, rgex, cexpr in detect:
                 if rgex and not re.match(rgex, searchin, re.IGNORECASE):
                     continue
+                if cexpr and not self.cc_test_cexpr(cexpr):
+                    continue
                 setattr(self, attr, True)
                 break
 
-        for attr, rgex in detect_args:
+        for attr, rgex, cexpr in detect_args:
             if rgex and not re.match(rgex, extra_args, re.IGNORECASE):
                 continue
+            if cexpr and not self.cc_test_cexpr(cexpr):
+                continue
             setattr(self, attr, True)
 
         if self.cc_on_noarch:
@@ -1071,6 +1077,25 @@ class _CCompiler:
             self.dist_log("testing failed", stderr=True)
         return test
 
+    @_Cache.me
+    def cc_test_cexpr(self, cexpr, flags=[]):
+        """
+        Same as the above but supports compile-time expressions.
+        """
+        self.dist_log("testing compiler expression", cexpr)
+        test_path = os.path.join(self.conf_tmp_path, "npy_dist_test_cexpr.c")
+        with open(test_path, "w") as fd:
+            fd.write(textwrap.dedent(f"""\
+               #if !({cexpr})
+                   #error "unsupported expression"
+               #endif
+               int dummy;
+            """))
+        test = self.dist_test(test_path, flags)
+        if not test:
+            self.dist_log("testing failed", stderr=True)
+        return test
+
     def cc_normalize_flags(self, flags):
         """
         Remove the conflicts that caused due gathering implied features flags.
diff --git a/numpy/distutils/checks/cpu_asimd.c b/numpy/distutils/checks/cpu_asimd.c
index 8df556b6c..fc408feb0 100644
--- a/numpy/distutils/checks/cpu_asimd.c
+++ b/numpy/distutils/checks/cpu_asimd.c
@@ -3,9 +3,10 @@
 #endif
 #include <arm_neon.h>
 
-int main(void)
+int main(int argc, char **argv)
 {
-    float32x4_t v1 = vdupq_n_f32(1.0f), v2 = vdupq_n_f32(2.0f);
+    float *src = (float*)argv[argc-1];
+    float32x4_t v1 = vdupq_n_f32(src[0]), v2 = vdupq_n_f32(src[1]);
     /* MAXMIN */
     int ret  = (int)vgetq_lane_f32(vmaxnmq_f32(v1, v2), 0);
         ret += (int)vgetq_lane_f32(vminnmq_f32(v1, v2), 0);
@@ -13,7 +14,8 @@ int main(void)
     ret += (int)vgetq_lane_f32(vrndq_f32(v1), 0);
 #ifdef __aarch64__
     {
-        float64x2_t vd1 = vdupq_n_f64(1.0), vd2 = vdupq_n_f64(2.0);
+        double *src2 = (float*)argv[argc-1];
+        float64x2_t vd1 = vdupq_n_f64(src2[0]), vd2 = vdupq_n_f64(src2[1]);
         /* MAXMIN */
         ret += (int)vgetq_lane_f64(vmaxnmq_f64(vd1, vd2), 0);
         ret += (int)vgetq_lane_f64(vminnmq_f64(vd1, vd2), 0);
diff --git a/numpy/distutils/checks/cpu_asimddp.c b/numpy/distutils/checks/cpu_asimddp.c
index 0158d1354..e7068ce02 100644
--- a/numpy/distutils/checks/cpu_asimddp.c
+++ b/numpy/distutils/checks/cpu_asimddp.c
@@ -3,9 +3,10 @@
 #endif
 #include <arm_neon.h>
 
-int main(void)
+int main(int argc, char **argv)
 {
-    uint8x16_t v1 = vdupq_n_u8((unsigned char)1), v2 = vdupq_n_u8((unsigned char)2);
+    unsigned char *src = (unsigned char*)argv[argc-1];
+    uint8x16_t v1 = vdupq_n_u8(src[0]), v2 = vdupq_n_u8(src[1]);
     uint32x4_t va = vdupq_n_u32(3);
     int ret = (int)vgetq_lane_u32(vdotq_u32(va, v1, v2), 0);
 #ifdef __aarch64__
diff --git a/numpy/distutils/checks/cpu_asimdfhm.c b/numpy/distutils/checks/cpu_asimdfhm.c
index cb49751c4..54e328098 100644
--- a/numpy/distutils/checks/cpu_asimdfhm.c
+++ b/numpy/distutils/checks/cpu_asimdfhm.c
@@ -3,12 +3,14 @@
 #endif
 #include <arm_neon.h>
 
-int main(void)
+int main(int argc, char **argv)
 {
-    float16x8_t vhp  = vdupq_n_f16((float16_t)1);
-    float16x4_t vlhp = vdup_n_f16((float16_t)1);
-    float32x4_t vf   = vdupq_n_f32(1.0f);
-    float32x2_t vlf  = vdup_n_f32(1.0f);
+    float16_t *src = (float16_t*)argv[argc-1];
+    float *src2 = (float*)argv[argc-2];
+    float16x8_t vhp  = vdupq_n_f16(src[0]);
+    float16x4_t vlhp = vdup_n_f16(src[1]);
+    float32x4_t vf   = vdupq_n_f32(src2[0]);
+    float32x2_t vlf  = vdup_n_f32(src2[1]);
 
     int ret  = (int)vget_lane_f32(vfmlal_low_f16(vlf, vlhp, vlhp), 0);
         ret += (int)vgetq_lane_f32(vfmlslq_high_f16(vf, vhp, vhp), 0);
diff --git a/numpy/distutils/checks/cpu_asimdhp.c b/numpy/distutils/checks/cpu_asimdhp.c
index 80b94000f..e2de0306e 100644
--- a/numpy/distutils/checks/cpu_asimdhp.c
+++ b/numpy/distutils/checks/cpu_asimdhp.c
@@ -3,10 +3,11 @@
 #endif
 #include <arm_neon.h>
 
-int main(void)
+int main(int argc, char **argv)
 {
-    float16x8_t vhp  = vdupq_n_f16((float16_t)-1);
-    float16x4_t vlhp = vdup_n_f16((float16_t)-1);
+    float16_t *src = (float16_t*)argv[argc-1];
+    float16x8_t vhp  = vdupq_n_f16(src[0]);
+    float16x4_t vlhp = vdup_n_f16(src[1]);
 
     int ret  =  (int)vgetq_lane_f16(vabdq_f16(vhp, vhp), 0);
         ret  += (int)vget_lane_f16(vabd_f16(vlhp, vlhp), 0);
diff --git a/numpy/distutils/checks/cpu_neon.c b/numpy/distutils/checks/cpu_neon.c
index 4eab1f384..8c64f864d 100644
--- a/numpy/distutils/checks/cpu_neon.c
+++ b/numpy/distutils/checks/cpu_neon.c
@@ -3,12 +3,16 @@
 #endif
 #include <arm_neon.h>
 
-int main(void)
+int main(int argc, char **argv)
 {
-    float32x4_t v1 = vdupq_n_f32(1.0f), v2 = vdupq_n_f32(2.0f);
+    // passing from untraced pointers to avoid optimizing out any constants
+    // so we can test against the linker.
+    float *src = (float*)argv[argc-1];
+    float32x4_t v1 = vdupq_n_f32(src[0]), v2 = vdupq_n_f32(src[1]);
     int ret = (int)vgetq_lane_f32(vmulq_f32(v1, v2), 0);
 #ifdef __aarch64__
-    float64x2_t vd1 = vdupq_n_f64(1.0), vd2 = vdupq_n_f64(2.0);
+    double *src2 = (double*)argv[argc-2];
+    float64x2_t vd1 = vdupq_n_f64(src2[0]), vd2 = vdupq_n_f64(src2[1]);
     ret += (int)vgetq_lane_f64(vmulq_f64(vd1, vd2), 0);
 #endif
     return ret;
diff --git a/numpy/distutils/checks/cpu_neon_fp16.c b/numpy/distutils/checks/cpu_neon_fp16.c
index 745d2e793..f3b949770 100644
--- a/numpy/distutils/checks/cpu_neon_fp16.c
+++ b/numpy/distutils/checks/cpu_neon_fp16.c
@@ -3,9 +3,9 @@
 #endif
 #include <arm_neon.h>
 
-int main(void)
+int main(int argc, char **argv)
 {
-    short z4[] = {0, 0, 0, 0, 0, 0, 0, 0};
-    float32x4_t v_z4 = vcvt_f32_f16((float16x4_t)vld1_s16((const short*)z4));
+    short *src = (short*)argv[argc-1];
+    float32x4_t v_z4 = vcvt_f32_f16((float16x4_t)vld1_s16(src));
     return (int)vgetq_lane_f32(v_z4, 0);
 }
diff --git a/numpy/distutils/checks/cpu_neon_vfpv4.c b/numpy/distutils/checks/cpu_neon_vfpv4.c
index 45f7b5d69..a039159dd 100644
--- a/numpy/distutils/checks/cpu_neon_vfpv4.c
+++ b/numpy/distutils/checks/cpu_neon_vfpv4.c
@@ -3,16 +3,18 @@
 #endif
 #include <arm_neon.h>
 
-int main(void)
+int main(int argc, char **argv)
 {
-    float32x4_t v1 = vdupq_n_f32(1.0f);
-    float32x4_t v2 = vdupq_n_f32(2.0f);
-    float32x4_t v3 = vdupq_n_f32(3.0f);
+    float *src = (float*)argv[argc-1];
+    float32x4_t v1 = vdupq_n_f32(src[0]);
+    float32x4_t v2 = vdupq_n_f32(src[1]);
+    float32x4_t v3 = vdupq_n_f32(src[2]);
     int ret = (int)vgetq_lane_f32(vfmaq_f32(v1, v2, v3), 0);
 #ifdef __aarch64__
-    float64x2_t vd1 = vdupq_n_f64(1.0);
-    float64x2_t vd2 = vdupq_n_f64(2.0);
-    float64x2_t vd3 = vdupq_n_f64(3.0);
+    double *src2 = (double*)argv[argc-2];
+    float64x2_t vd1 = vdupq_n_f64(src2[0]);
+    float64x2_t vd2 = vdupq_n_f64(src2[1]);
+    float64x2_t vd3 = vdupq_n_f64(src2[2]);
     ret += (int)vgetq_lane_f64(vfmaq_f64(vd1, vd2, vd3), 0);
 #endif
     return ret;