summaryrefslogtreecommitdiff
path: root/numpy/core
diff options
context:
space:
mode:
authorCharles Harris <charlesr.harris@gmail.com>2021-06-27 16:35:28 -0600
committerGitHub <noreply@github.com>2021-06-27 16:35:28 -0600
commita11945d7dc930afe47e1cf7f81cb69f2958f4c1f (patch)
tree5fb197a926cb6573d3311294d1c0a7085a839465 /numpy/core
parentc3faa8e566a8f37334fb347981f608c420687d2d (diff)
parent51a089f5958fa0ec65b3f524d009f0e8d748d862 (diff)
downloadnumpy-a11945d7dc930afe47e1cf7f81cb69f2958f4c1f.tar.gz
Merge pull request #19362 from seiko2plus/issue_19319
BUG, SIMD: Fix detecting AVX512 features on Darwin
Diffstat (limited to 'numpy/core')
-rw-r--r--numpy/core/src/common/npy_cpu_features.c.src24
1 files changed, 23 insertions, 1 deletions
diff --git a/numpy/core/src/common/npy_cpu_features.c.src b/numpy/core/src/common/npy_cpu_features.c.src
index 4f3a95c71..1e0f4a571 100644
--- a/numpy/core/src/common/npy_cpu_features.c.src
+++ b/numpy/core/src/common/npy_cpu_features.c.src
@@ -394,8 +394,30 @@ npy__cpu_init_features(void)
npy__cpu_have[NPY_CPU_FEATURE_FMA] = npy__cpu_have[NPY_CPU_FEATURE_FMA3];
// check AVX512 OS support
- if ((xcr & 0xe6) != 0xe6)
+ int avx512_os = (xcr & 0xe6) == 0xe6;
+#if defined(__APPLE__) && defined(__x86_64__)
+ /**
+ * On darwin, machines with AVX512 support, by default, threads are created with
+ * AVX512 masked off in XCR0 and an AVX-sized savearea is used.
+ * However, AVX512 capabilities are advertised in the commpage and via sysctl.
+ * for more information, check:
+ * - https://github.com/apple/darwin-xnu/blob/0a798f6738bc1db01281fc08ae024145e84df927/osfmk/i386/fpu.c#L175-L201
+ * - https://github.com/golang/go/issues/43089
+ * - https://github.com/numpy/numpy/issues/19319
+ */
+ if (!avx512_os) {
+ npy_uintp commpage64_addr = 0x00007fffffe00000ULL;
+ npy_uint16 commpage64_ver = *((npy_uint16*)(commpage64_addr + 0x01E));
+ // cpu_capabilities64 undefined in versions < 13
+ if (commpage64_ver > 12) {
+ npy_uint64 commpage64_cap = *((npy_uint64*)(commpage64_addr + 0x010));
+ avx512_os = (commpage64_cap & 0x0000004000000000ULL) != 0;
+ }
+ }
+#endif
+ if (!avx512_os) {
return;
+ }
npy__cpu_have[NPY_CPU_FEATURE_AVX512F] = (reg[1] & (1 << 16)) != 0;
npy__cpu_have[NPY_CPU_FEATURE_AVX512CD] = (reg[1] & (1 << 28)) != 0;
if (npy__cpu_have[NPY_CPU_FEATURE_AVX512F] && npy__cpu_have[NPY_CPU_FEATURE_AVX512CD]) {