diff options
Diffstat (limited to 'numpy')
| -rw-r--r-- | numpy/distutils/ccompiler_opt.py | 67 | ||||
| -rw-r--r-- | numpy/distutils/checks/cpu_asimd.c | 8 | ||||
| -rw-r--r-- | numpy/distutils/checks/cpu_asimddp.c | 5 | ||||
| -rw-r--r-- | numpy/distutils/checks/cpu_asimdfhm.c | 12 | ||||
| -rw-r--r-- | numpy/distutils/checks/cpu_asimdhp.c | 7 | ||||
| -rw-r--r-- | numpy/distutils/checks/cpu_neon.c | 10 | ||||
| -rw-r--r-- | numpy/distutils/checks/cpu_neon_fp16.c | 6 | ||||
| -rw-r--r-- | numpy/distutils/checks/cpu_neon_vfpv4.c | 16 |
8 files changed, 84 insertions, 47 deletions
diff --git a/numpy/distutils/ccompiler_opt.py b/numpy/distutils/ccompiler_opt.py index befc83c16..2019dcb25 100644 --- a/numpy/distutils/ccompiler_opt.py +++ b/numpy/distutils/ccompiler_opt.py @@ -955,51 +955,57 @@ class _CCompiler: def __init__(self): if hasattr(self, "cc_is_cached"): return - # attr regex + # attr regex compiler-expression detect_arch = ( - ("cc_on_x64", ".*(x|x86_|amd)64.*"), - ("cc_on_x86", ".*(win32|x86|i386|i686).*"), - ("cc_on_ppc64le", ".*(powerpc|ppc)64(el|le).*"), - ("cc_on_ppc64", ".*(powerpc|ppc)64.*"), - ("cc_on_aarch64", ".*(aarch64|arm64).*"), - ("cc_on_armhf", ".*arm.*"), - ("cc_on_s390x", ".*s390x.*"), + ("cc_on_x64", ".*(x|x86_|amd)64.*", ""), + ("cc_on_x86", ".*(win32|x86|i386|i686).*", ""), + ("cc_on_ppc64le", ".*(powerpc|ppc)64(el|le).*", ""), + ("cc_on_ppc64", ".*(powerpc|ppc)64.*", ""), + ("cc_on_aarch64", ".*(aarch64|arm64).*", ""), + ("cc_on_armhf", ".*arm.*", "defined(__ARM_ARCH_7__) || " + "defined(__ARM_ARCH_7A__)"), + ("cc_on_s390x", ".*s390x.*", ""), # undefined platform - ("cc_on_noarch", ""), + ("cc_on_noarch", "", ""), ) detect_compiler = ( - ("cc_is_gcc", r".*(gcc|gnu\-g).*"), - ("cc_is_clang", ".*clang.*"), - ("cc_is_iccw", ".*(intelw|intelemw|iccw).*"), # intel msvc like - ("cc_is_icc", ".*(intel|icc).*"), # intel unix like - ("cc_is_msvc", ".*msvc.*"), + ("cc_is_gcc", r".*(gcc|gnu\-g).*", ""), + ("cc_is_clang", ".*clang.*", ""), + # intel msvc like + ("cc_is_iccw", ".*(intelw|intelemw|iccw).*", ""), + ("cc_is_icc", ".*(intel|icc).*", ""), # intel unix like + ("cc_is_msvc", ".*msvc.*", ""), # undefined compiler will be treat it as gcc - ("cc_is_nocc", ""), + ("cc_is_nocc", "", ""), ) detect_args = ( - ("cc_has_debug", ".*(O0|Od|ggdb|coverage|debug:full).*"), - ("cc_has_native", ".*(-march=native|-xHost|/QxHost).*"), + ("cc_has_debug", ".*(O0|Od|ggdb|coverage|debug:full).*", ""), + ("cc_has_native", ".*(-march=native|-xHost|/QxHost).*", ""), # in case if the class run with -DNPY_DISABLE_OPTIMIZATION - ("cc_noopt", ".*DISABLE_OPT.*"), + ("cc_noopt", ".*DISABLE_OPT.*", ""), ) dist_info = self.dist_info() platform, compiler_info, extra_args = dist_info # set False to all attrs for section in (detect_arch, detect_compiler, detect_args): - for attr, rgex in section: + for attr, rgex, cexpr in section: setattr(self, attr, False) for detect, searchin in ((detect_arch, platform), (detect_compiler, compiler_info)): - for attr, rgex in detect: + for attr, rgex, cexpr in detect: if rgex and not re.match(rgex, searchin, re.IGNORECASE): continue + if cexpr and not self.cc_test_cexpr(cexpr): + continue setattr(self, attr, True) break - for attr, rgex in detect_args: + for attr, rgex, cexpr in detect_args: if rgex and not re.match(rgex, extra_args, re.IGNORECASE): continue + if cexpr and not self.cc_test_cexpr(cexpr): + continue setattr(self, attr, True) if self.cc_on_noarch: @@ -1071,6 +1077,25 @@ class _CCompiler: self.dist_log("testing failed", stderr=True) return test + @_Cache.me + def cc_test_cexpr(self, cexpr, flags=[]): + """ + Same as the above but supports compile-time expressions. + """ + self.dist_log("testing compiler expression", cexpr) + test_path = os.path.join(self.conf_tmp_path, "npy_dist_test_cexpr.c") + with open(test_path, "w") as fd: + fd.write(textwrap.dedent(f"""\ + #if !({cexpr}) + #error "unsupported expression" + #endif + int dummy; + """)) + test = self.dist_test(test_path, flags) + if not test: + self.dist_log("testing failed", stderr=True) + return test + def cc_normalize_flags(self, flags): """ Remove the conflicts that caused due gathering implied features flags. diff --git a/numpy/distutils/checks/cpu_asimd.c b/numpy/distutils/checks/cpu_asimd.c index 8df556b6c..fc408feb0 100644 --- a/numpy/distutils/checks/cpu_asimd.c +++ b/numpy/distutils/checks/cpu_asimd.c @@ -3,9 +3,10 @@ #endif #include <arm_neon.h> -int main(void) +int main(int argc, char **argv) { - float32x4_t v1 = vdupq_n_f32(1.0f), v2 = vdupq_n_f32(2.0f); + float *src = (float*)argv[argc-1]; + float32x4_t v1 = vdupq_n_f32(src[0]), v2 = vdupq_n_f32(src[1]); /* MAXMIN */ int ret = (int)vgetq_lane_f32(vmaxnmq_f32(v1, v2), 0); ret += (int)vgetq_lane_f32(vminnmq_f32(v1, v2), 0); @@ -13,7 +14,8 @@ int main(void) ret += (int)vgetq_lane_f32(vrndq_f32(v1), 0); #ifdef __aarch64__ { - float64x2_t vd1 = vdupq_n_f64(1.0), vd2 = vdupq_n_f64(2.0); + double *src2 = (float*)argv[argc-1]; + float64x2_t vd1 = vdupq_n_f64(src2[0]), vd2 = vdupq_n_f64(src2[1]); /* MAXMIN */ ret += (int)vgetq_lane_f64(vmaxnmq_f64(vd1, vd2), 0); ret += (int)vgetq_lane_f64(vminnmq_f64(vd1, vd2), 0); diff --git a/numpy/distutils/checks/cpu_asimddp.c b/numpy/distutils/checks/cpu_asimddp.c index 0158d1354..e7068ce02 100644 --- a/numpy/distutils/checks/cpu_asimddp.c +++ b/numpy/distutils/checks/cpu_asimddp.c @@ -3,9 +3,10 @@ #endif #include <arm_neon.h> -int main(void) +int main(int argc, char **argv) { - uint8x16_t v1 = vdupq_n_u8((unsigned char)1), v2 = vdupq_n_u8((unsigned char)2); + unsigned char *src = (unsigned char*)argv[argc-1]; + uint8x16_t v1 = vdupq_n_u8(src[0]), v2 = vdupq_n_u8(src[1]); uint32x4_t va = vdupq_n_u32(3); int ret = (int)vgetq_lane_u32(vdotq_u32(va, v1, v2), 0); #ifdef __aarch64__ diff --git a/numpy/distutils/checks/cpu_asimdfhm.c b/numpy/distutils/checks/cpu_asimdfhm.c index cb49751c4..54e328098 100644 --- a/numpy/distutils/checks/cpu_asimdfhm.c +++ b/numpy/distutils/checks/cpu_asimdfhm.c @@ -3,12 +3,14 @@ #endif #include <arm_neon.h> -int main(void) +int main(int argc, char **argv) { - float16x8_t vhp = vdupq_n_f16((float16_t)1); - float16x4_t vlhp = vdup_n_f16((float16_t)1); - float32x4_t vf = vdupq_n_f32(1.0f); - float32x2_t vlf = vdup_n_f32(1.0f); + float16_t *src = (float16_t*)argv[argc-1]; + float *src2 = (float*)argv[argc-2]; + float16x8_t vhp = vdupq_n_f16(src[0]); + float16x4_t vlhp = vdup_n_f16(src[1]); + float32x4_t vf = vdupq_n_f32(src2[0]); + float32x2_t vlf = vdup_n_f32(src2[1]); int ret = (int)vget_lane_f32(vfmlal_low_f16(vlf, vlhp, vlhp), 0); ret += (int)vgetq_lane_f32(vfmlslq_high_f16(vf, vhp, vhp), 0); diff --git a/numpy/distutils/checks/cpu_asimdhp.c b/numpy/distutils/checks/cpu_asimdhp.c index 80b94000f..e2de0306e 100644 --- a/numpy/distutils/checks/cpu_asimdhp.c +++ b/numpy/distutils/checks/cpu_asimdhp.c @@ -3,10 +3,11 @@ #endif #include <arm_neon.h> -int main(void) +int main(int argc, char **argv) { - float16x8_t vhp = vdupq_n_f16((float16_t)-1); - float16x4_t vlhp = vdup_n_f16((float16_t)-1); + float16_t *src = (float16_t*)argv[argc-1]; + float16x8_t vhp = vdupq_n_f16(src[0]); + float16x4_t vlhp = vdup_n_f16(src[1]); int ret = (int)vgetq_lane_f16(vabdq_f16(vhp, vhp), 0); ret += (int)vget_lane_f16(vabd_f16(vlhp, vlhp), 0); diff --git a/numpy/distutils/checks/cpu_neon.c b/numpy/distutils/checks/cpu_neon.c index 4eab1f384..8c64f864d 100644 --- a/numpy/distutils/checks/cpu_neon.c +++ b/numpy/distutils/checks/cpu_neon.c @@ -3,12 +3,16 @@ #endif #include <arm_neon.h> -int main(void) +int main(int argc, char **argv) { - float32x4_t v1 = vdupq_n_f32(1.0f), v2 = vdupq_n_f32(2.0f); + // passing from untraced pointers to avoid optimizing out any constants + // so we can test against the linker. + float *src = (float*)argv[argc-1]; + float32x4_t v1 = vdupq_n_f32(src[0]), v2 = vdupq_n_f32(src[1]); int ret = (int)vgetq_lane_f32(vmulq_f32(v1, v2), 0); #ifdef __aarch64__ - float64x2_t vd1 = vdupq_n_f64(1.0), vd2 = vdupq_n_f64(2.0); + double *src2 = (double*)argv[argc-2]; + float64x2_t vd1 = vdupq_n_f64(src2[0]), vd2 = vdupq_n_f64(src2[1]); ret += (int)vgetq_lane_f64(vmulq_f64(vd1, vd2), 0); #endif return ret; diff --git a/numpy/distutils/checks/cpu_neon_fp16.c b/numpy/distutils/checks/cpu_neon_fp16.c index 745d2e793..f3b949770 100644 --- a/numpy/distutils/checks/cpu_neon_fp16.c +++ b/numpy/distutils/checks/cpu_neon_fp16.c @@ -3,9 +3,9 @@ #endif #include <arm_neon.h> -int main(void) +int main(int argc, char **argv) { - short z4[] = {0, 0, 0, 0, 0, 0, 0, 0}; - float32x4_t v_z4 = vcvt_f32_f16((float16x4_t)vld1_s16((const short*)z4)); + short *src = (short*)argv[argc-1]; + float32x4_t v_z4 = vcvt_f32_f16((float16x4_t)vld1_s16(src)); return (int)vgetq_lane_f32(v_z4, 0); } diff --git a/numpy/distutils/checks/cpu_neon_vfpv4.c b/numpy/distutils/checks/cpu_neon_vfpv4.c index 45f7b5d69..a039159dd 100644 --- a/numpy/distutils/checks/cpu_neon_vfpv4.c +++ b/numpy/distutils/checks/cpu_neon_vfpv4.c @@ -3,16 +3,18 @@ #endif #include <arm_neon.h> -int main(void) +int main(int argc, char **argv) { - float32x4_t v1 = vdupq_n_f32(1.0f); - float32x4_t v2 = vdupq_n_f32(2.0f); - float32x4_t v3 = vdupq_n_f32(3.0f); + float *src = (float*)argv[argc-1]; + float32x4_t v1 = vdupq_n_f32(src[0]); + float32x4_t v2 = vdupq_n_f32(src[1]); + float32x4_t v3 = vdupq_n_f32(src[2]); int ret = (int)vgetq_lane_f32(vfmaq_f32(v1, v2, v3), 0); #ifdef __aarch64__ - float64x2_t vd1 = vdupq_n_f64(1.0); - float64x2_t vd2 = vdupq_n_f64(2.0); - float64x2_t vd3 = vdupq_n_f64(3.0); + double *src2 = (double*)argv[argc-2]; + float64x2_t vd1 = vdupq_n_f64(src2[0]); + float64x2_t vd2 = vdupq_n_f64(src2[1]); + float64x2_t vd3 = vdupq_n_f64(src2[2]); ret += (int)vgetq_lane_f64(vfmaq_f64(vd1, vd2, vd3), 0); #endif return ret; |
