summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
Diffstat (limited to 'numpy')
-rw-r--r--numpy/distutils/ccompiler_opt.py67
-rw-r--r--numpy/distutils/checks/cpu_asimd.c8
-rw-r--r--numpy/distutils/checks/cpu_asimddp.c5
-rw-r--r--numpy/distutils/checks/cpu_asimdfhm.c12
-rw-r--r--numpy/distutils/checks/cpu_asimdhp.c7
-rw-r--r--numpy/distutils/checks/cpu_neon.c10
-rw-r--r--numpy/distutils/checks/cpu_neon_fp16.c6
-rw-r--r--numpy/distutils/checks/cpu_neon_vfpv4.c16
8 files changed, 84 insertions, 47 deletions
diff --git a/numpy/distutils/ccompiler_opt.py b/numpy/distutils/ccompiler_opt.py
index befc83c16..2019dcb25 100644
--- a/numpy/distutils/ccompiler_opt.py
+++ b/numpy/distutils/ccompiler_opt.py
@@ -955,51 +955,57 @@ class _CCompiler:
def __init__(self):
if hasattr(self, "cc_is_cached"):
return
- # attr regex
+ # attr regex compiler-expression
detect_arch = (
- ("cc_on_x64", ".*(x|x86_|amd)64.*"),
- ("cc_on_x86", ".*(win32|x86|i386|i686).*"),
- ("cc_on_ppc64le", ".*(powerpc|ppc)64(el|le).*"),
- ("cc_on_ppc64", ".*(powerpc|ppc)64.*"),
- ("cc_on_aarch64", ".*(aarch64|arm64).*"),
- ("cc_on_armhf", ".*arm.*"),
- ("cc_on_s390x", ".*s390x.*"),
+ ("cc_on_x64", ".*(x|x86_|amd)64.*", ""),
+ ("cc_on_x86", ".*(win32|x86|i386|i686).*", ""),
+ ("cc_on_ppc64le", ".*(powerpc|ppc)64(el|le).*", ""),
+ ("cc_on_ppc64", ".*(powerpc|ppc)64.*", ""),
+ ("cc_on_aarch64", ".*(aarch64|arm64).*", ""),
+ ("cc_on_armhf", ".*arm.*", "defined(__ARM_ARCH_7__) || "
+ "defined(__ARM_ARCH_7A__)"),
+ ("cc_on_s390x", ".*s390x.*", ""),
# undefined platform
- ("cc_on_noarch", ""),
+ ("cc_on_noarch", "", ""),
)
detect_compiler = (
- ("cc_is_gcc", r".*(gcc|gnu\-g).*"),
- ("cc_is_clang", ".*clang.*"),
- ("cc_is_iccw", ".*(intelw|intelemw|iccw).*"), # intel msvc like
- ("cc_is_icc", ".*(intel|icc).*"), # intel unix like
- ("cc_is_msvc", ".*msvc.*"),
+ ("cc_is_gcc", r".*(gcc|gnu\-g).*", ""),
+ ("cc_is_clang", ".*clang.*", ""),
+ # intel msvc like
+ ("cc_is_iccw", ".*(intelw|intelemw|iccw).*", ""),
+ ("cc_is_icc", ".*(intel|icc).*", ""), # intel unix like
+ ("cc_is_msvc", ".*msvc.*", ""),
# undefined compiler will be treat it as gcc
- ("cc_is_nocc", ""),
+ ("cc_is_nocc", "", ""),
)
detect_args = (
- ("cc_has_debug", ".*(O0|Od|ggdb|coverage|debug:full).*"),
- ("cc_has_native", ".*(-march=native|-xHost|/QxHost).*"),
+ ("cc_has_debug", ".*(O0|Od|ggdb|coverage|debug:full).*", ""),
+ ("cc_has_native", ".*(-march=native|-xHost|/QxHost).*", ""),
# in case if the class run with -DNPY_DISABLE_OPTIMIZATION
- ("cc_noopt", ".*DISABLE_OPT.*"),
+ ("cc_noopt", ".*DISABLE_OPT.*", ""),
)
dist_info = self.dist_info()
platform, compiler_info, extra_args = dist_info
# set False to all attrs
for section in (detect_arch, detect_compiler, detect_args):
- for attr, rgex in section:
+ for attr, rgex, cexpr in section:
setattr(self, attr, False)
for detect, searchin in ((detect_arch, platform), (detect_compiler, compiler_info)):
- for attr, rgex in detect:
+ for attr, rgex, cexpr in detect:
if rgex and not re.match(rgex, searchin, re.IGNORECASE):
continue
+ if cexpr and not self.cc_test_cexpr(cexpr):
+ continue
setattr(self, attr, True)
break
- for attr, rgex in detect_args:
+ for attr, rgex, cexpr in detect_args:
if rgex and not re.match(rgex, extra_args, re.IGNORECASE):
continue
+ if cexpr and not self.cc_test_cexpr(cexpr):
+ continue
setattr(self, attr, True)
if self.cc_on_noarch:
@@ -1071,6 +1077,25 @@ class _CCompiler:
self.dist_log("testing failed", stderr=True)
return test
+ @_Cache.me
+ def cc_test_cexpr(self, cexpr, flags=[]):
+ """
+ Same as the above but supports compile-time expressions.
+ """
+ self.dist_log("testing compiler expression", cexpr)
+ test_path = os.path.join(self.conf_tmp_path, "npy_dist_test_cexpr.c")
+ with open(test_path, "w") as fd:
+ fd.write(textwrap.dedent(f"""\
+ #if !({cexpr})
+ #error "unsupported expression"
+ #endif
+ int dummy;
+ """))
+ test = self.dist_test(test_path, flags)
+ if not test:
+ self.dist_log("testing failed", stderr=True)
+ return test
+
def cc_normalize_flags(self, flags):
"""
Remove the conflicts that caused due gathering implied features flags.
diff --git a/numpy/distutils/checks/cpu_asimd.c b/numpy/distutils/checks/cpu_asimd.c
index 8df556b6c..fc408feb0 100644
--- a/numpy/distutils/checks/cpu_asimd.c
+++ b/numpy/distutils/checks/cpu_asimd.c
@@ -3,9 +3,10 @@
#endif
#include <arm_neon.h>
-int main(void)
+int main(int argc, char **argv)
{
- float32x4_t v1 = vdupq_n_f32(1.0f), v2 = vdupq_n_f32(2.0f);
+ float *src = (float*)argv[argc-1];
+ float32x4_t v1 = vdupq_n_f32(src[0]), v2 = vdupq_n_f32(src[1]);
/* MAXMIN */
int ret = (int)vgetq_lane_f32(vmaxnmq_f32(v1, v2), 0);
ret += (int)vgetq_lane_f32(vminnmq_f32(v1, v2), 0);
@@ -13,7 +14,8 @@ int main(void)
ret += (int)vgetq_lane_f32(vrndq_f32(v1), 0);
#ifdef __aarch64__
{
- float64x2_t vd1 = vdupq_n_f64(1.0), vd2 = vdupq_n_f64(2.0);
+ double *src2 = (float*)argv[argc-1];
+ float64x2_t vd1 = vdupq_n_f64(src2[0]), vd2 = vdupq_n_f64(src2[1]);
/* MAXMIN */
ret += (int)vgetq_lane_f64(vmaxnmq_f64(vd1, vd2), 0);
ret += (int)vgetq_lane_f64(vminnmq_f64(vd1, vd2), 0);
diff --git a/numpy/distutils/checks/cpu_asimddp.c b/numpy/distutils/checks/cpu_asimddp.c
index 0158d1354..e7068ce02 100644
--- a/numpy/distutils/checks/cpu_asimddp.c
+++ b/numpy/distutils/checks/cpu_asimddp.c
@@ -3,9 +3,10 @@
#endif
#include <arm_neon.h>
-int main(void)
+int main(int argc, char **argv)
{
- uint8x16_t v1 = vdupq_n_u8((unsigned char)1), v2 = vdupq_n_u8((unsigned char)2);
+ unsigned char *src = (unsigned char*)argv[argc-1];
+ uint8x16_t v1 = vdupq_n_u8(src[0]), v2 = vdupq_n_u8(src[1]);
uint32x4_t va = vdupq_n_u32(3);
int ret = (int)vgetq_lane_u32(vdotq_u32(va, v1, v2), 0);
#ifdef __aarch64__
diff --git a/numpy/distutils/checks/cpu_asimdfhm.c b/numpy/distutils/checks/cpu_asimdfhm.c
index cb49751c4..54e328098 100644
--- a/numpy/distutils/checks/cpu_asimdfhm.c
+++ b/numpy/distutils/checks/cpu_asimdfhm.c
@@ -3,12 +3,14 @@
#endif
#include <arm_neon.h>
-int main(void)
+int main(int argc, char **argv)
{
- float16x8_t vhp = vdupq_n_f16((float16_t)1);
- float16x4_t vlhp = vdup_n_f16((float16_t)1);
- float32x4_t vf = vdupq_n_f32(1.0f);
- float32x2_t vlf = vdup_n_f32(1.0f);
+ float16_t *src = (float16_t*)argv[argc-1];
+ float *src2 = (float*)argv[argc-2];
+ float16x8_t vhp = vdupq_n_f16(src[0]);
+ float16x4_t vlhp = vdup_n_f16(src[1]);
+ float32x4_t vf = vdupq_n_f32(src2[0]);
+ float32x2_t vlf = vdup_n_f32(src2[1]);
int ret = (int)vget_lane_f32(vfmlal_low_f16(vlf, vlhp, vlhp), 0);
ret += (int)vgetq_lane_f32(vfmlslq_high_f16(vf, vhp, vhp), 0);
diff --git a/numpy/distutils/checks/cpu_asimdhp.c b/numpy/distutils/checks/cpu_asimdhp.c
index 80b94000f..e2de0306e 100644
--- a/numpy/distutils/checks/cpu_asimdhp.c
+++ b/numpy/distutils/checks/cpu_asimdhp.c
@@ -3,10 +3,11 @@
#endif
#include <arm_neon.h>
-int main(void)
+int main(int argc, char **argv)
{
- float16x8_t vhp = vdupq_n_f16((float16_t)-1);
- float16x4_t vlhp = vdup_n_f16((float16_t)-1);
+ float16_t *src = (float16_t*)argv[argc-1];
+ float16x8_t vhp = vdupq_n_f16(src[0]);
+ float16x4_t vlhp = vdup_n_f16(src[1]);
int ret = (int)vgetq_lane_f16(vabdq_f16(vhp, vhp), 0);
ret += (int)vget_lane_f16(vabd_f16(vlhp, vlhp), 0);
diff --git a/numpy/distutils/checks/cpu_neon.c b/numpy/distutils/checks/cpu_neon.c
index 4eab1f384..8c64f864d 100644
--- a/numpy/distutils/checks/cpu_neon.c
+++ b/numpy/distutils/checks/cpu_neon.c
@@ -3,12 +3,16 @@
#endif
#include <arm_neon.h>
-int main(void)
+int main(int argc, char **argv)
{
- float32x4_t v1 = vdupq_n_f32(1.0f), v2 = vdupq_n_f32(2.0f);
+ // passing from untraced pointers to avoid optimizing out any constants
+ // so we can test against the linker.
+ float *src = (float*)argv[argc-1];
+ float32x4_t v1 = vdupq_n_f32(src[0]), v2 = vdupq_n_f32(src[1]);
int ret = (int)vgetq_lane_f32(vmulq_f32(v1, v2), 0);
#ifdef __aarch64__
- float64x2_t vd1 = vdupq_n_f64(1.0), vd2 = vdupq_n_f64(2.0);
+ double *src2 = (double*)argv[argc-2];
+ float64x2_t vd1 = vdupq_n_f64(src2[0]), vd2 = vdupq_n_f64(src2[1]);
ret += (int)vgetq_lane_f64(vmulq_f64(vd1, vd2), 0);
#endif
return ret;
diff --git a/numpy/distutils/checks/cpu_neon_fp16.c b/numpy/distutils/checks/cpu_neon_fp16.c
index 745d2e793..f3b949770 100644
--- a/numpy/distutils/checks/cpu_neon_fp16.c
+++ b/numpy/distutils/checks/cpu_neon_fp16.c
@@ -3,9 +3,9 @@
#endif
#include <arm_neon.h>
-int main(void)
+int main(int argc, char **argv)
{
- short z4[] = {0, 0, 0, 0, 0, 0, 0, 0};
- float32x4_t v_z4 = vcvt_f32_f16((float16x4_t)vld1_s16((const short*)z4));
+ short *src = (short*)argv[argc-1];
+ float32x4_t v_z4 = vcvt_f32_f16((float16x4_t)vld1_s16(src));
return (int)vgetq_lane_f32(v_z4, 0);
}
diff --git a/numpy/distutils/checks/cpu_neon_vfpv4.c b/numpy/distutils/checks/cpu_neon_vfpv4.c
index 45f7b5d69..a039159dd 100644
--- a/numpy/distutils/checks/cpu_neon_vfpv4.c
+++ b/numpy/distutils/checks/cpu_neon_vfpv4.c
@@ -3,16 +3,18 @@
#endif
#include <arm_neon.h>
-int main(void)
+int main(int argc, char **argv)
{
- float32x4_t v1 = vdupq_n_f32(1.0f);
- float32x4_t v2 = vdupq_n_f32(2.0f);
- float32x4_t v3 = vdupq_n_f32(3.0f);
+ float *src = (float*)argv[argc-1];
+ float32x4_t v1 = vdupq_n_f32(src[0]);
+ float32x4_t v2 = vdupq_n_f32(src[1]);
+ float32x4_t v3 = vdupq_n_f32(src[2]);
int ret = (int)vgetq_lane_f32(vfmaq_f32(v1, v2, v3), 0);
#ifdef __aarch64__
- float64x2_t vd1 = vdupq_n_f64(1.0);
- float64x2_t vd2 = vdupq_n_f64(2.0);
- float64x2_t vd3 = vdupq_n_f64(3.0);
+ double *src2 = (double*)argv[argc-2];
+ float64x2_t vd1 = vdupq_n_f64(src2[0]);
+ float64x2_t vd2 = vdupq_n_f64(src2[1]);
+ float64x2_t vd3 = vdupq_n_f64(src2[2]);
ret += (int)vgetq_lane_f64(vfmaq_f64(vd1, vd2, vd3), 0);
#endif
return ret;