summaryrefslogtreecommitdiff
path: root/numpy/core
diff options
context:
space:
mode:
Diffstat (limited to 'numpy/core')
-rw-r--r--numpy/core/src/common/npy_cpu_features.c.src24
1 files changed, 23 insertions, 1 deletions
diff --git a/numpy/core/src/common/npy_cpu_features.c.src b/numpy/core/src/common/npy_cpu_features.c.src
index 4f3a95c71..1e0f4a571 100644
--- a/numpy/core/src/common/npy_cpu_features.c.src
+++ b/numpy/core/src/common/npy_cpu_features.c.src
@@ -394,8 +394,30 @@ npy__cpu_init_features(void)
npy__cpu_have[NPY_CPU_FEATURE_FMA] = npy__cpu_have[NPY_CPU_FEATURE_FMA3];
// check AVX512 OS support
- if ((xcr & 0xe6) != 0xe6)
+ int avx512_os = (xcr & 0xe6) == 0xe6;
+#if defined(__APPLE__) && defined(__x86_64__)
+ /**
+ * On darwin, machines with AVX512 support, by default, threads are created with
+ * AVX512 masked off in XCR0 and an AVX-sized savearea is used.
+ * However, AVX512 capabilities are advertised in the commpage and via sysctl.
+ * for more information, check:
+ * - https://github.com/apple/darwin-xnu/blob/0a798f6738bc1db01281fc08ae024145e84df927/osfmk/i386/fpu.c#L175-L201
+ * - https://github.com/golang/go/issues/43089
+ * - https://github.com/numpy/numpy/issues/19319
+ */
+ if (!avx512_os) {
+ npy_uintp commpage64_addr = 0x00007fffffe00000ULL;
+ npy_uint16 commpage64_ver = *((npy_uint16*)(commpage64_addr + 0x01E));
+ // cpu_capabilities64 undefined in versions < 13
+ if (commpage64_ver > 12) {
+ npy_uint64 commpage64_cap = *((npy_uint64*)(commpage64_addr + 0x010));
+ avx512_os = (commpage64_cap & 0x0000004000000000ULL) != 0;
+ }
+ }
+#endif
+ if (!avx512_os) {
return;
+ }
npy__cpu_have[NPY_CPU_FEATURE_AVX512F] = (reg[1] & (1 << 16)) != 0;
npy__cpu_have[NPY_CPU_FEATURE_AVX512CD] = (reg[1] & (1 << 28)) != 0;
if (npy__cpu_have[NPY_CPU_FEATURE_AVX512F] && npy__cpu_have[NPY_CPU_FEATURE_AVX512CD]) {