diff options
author | Charles Harris <charlesr.harris@gmail.com> | 2021-06-27 16:35:28 -0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-06-27 16:35:28 -0600 |
commit | a11945d7dc930afe47e1cf7f81cb69f2958f4c1f (patch) | |
tree | 5fb197a926cb6573d3311294d1c0a7085a839465 /numpy/core | |
parent | c3faa8e566a8f37334fb347981f608c420687d2d (diff) | |
parent | 51a089f5958fa0ec65b3f524d009f0e8d748d862 (diff) | |
download | numpy-a11945d7dc930afe47e1cf7f81cb69f2958f4c1f.tar.gz |
Merge pull request #19362 from seiko2plus/issue_19319
BUG, SIMD: Fix detecting AVX512 features on Darwin
Diffstat (limited to 'numpy/core')
-rw-r--r-- | numpy/core/src/common/npy_cpu_features.c.src | 24 |
1 files changed, 23 insertions, 1 deletions
diff --git a/numpy/core/src/common/npy_cpu_features.c.src b/numpy/core/src/common/npy_cpu_features.c.src index 4f3a95c71..1e0f4a571 100644 --- a/numpy/core/src/common/npy_cpu_features.c.src +++ b/numpy/core/src/common/npy_cpu_features.c.src @@ -394,8 +394,30 @@ npy__cpu_init_features(void) npy__cpu_have[NPY_CPU_FEATURE_FMA] = npy__cpu_have[NPY_CPU_FEATURE_FMA3]; // check AVX512 OS support - if ((xcr & 0xe6) != 0xe6) + int avx512_os = (xcr & 0xe6) == 0xe6; +#if defined(__APPLE__) && defined(__x86_64__) + /** + * On darwin, machines with AVX512 support, by default, threads are created with + * AVX512 masked off in XCR0 and an AVX-sized savearea is used. + * However, AVX512 capabilities are advertised in the commpage and via sysctl. + * for more information, check: + * - https://github.com/apple/darwin-xnu/blob/0a798f6738bc1db01281fc08ae024145e84df927/osfmk/i386/fpu.c#L175-L201 + * - https://github.com/golang/go/issues/43089 + * - https://github.com/numpy/numpy/issues/19319 + */ + if (!avx512_os) { + npy_uintp commpage64_addr = 0x00007fffffe00000ULL; + npy_uint16 commpage64_ver = *((npy_uint16*)(commpage64_addr + 0x01E)); + // cpu_capabilities64 undefined in versions < 13 + if (commpage64_ver > 12) { + npy_uint64 commpage64_cap = *((npy_uint64*)(commpage64_addr + 0x010)); + avx512_os = (commpage64_cap & 0x0000004000000000ULL) != 0; + } + } +#endif + if (!avx512_os) { return; + } npy__cpu_have[NPY_CPU_FEATURE_AVX512F] = (reg[1] & (1 << 16)) != 0; npy__cpu_have[NPY_CPU_FEATURE_AVX512CD] = (reg[1] & (1 << 28)) != 0; if (npy__cpu_have[NPY_CPU_FEATURE_AVX512F] && npy__cpu_have[NPY_CPU_FEATURE_AVX512CD]) { |