summaryrefslogtreecommitdiff
path: root/numpy/core
diff options
context:
space:
mode:
authorChunlin <fangchunlin@huawei.com>2020-05-26 18:44:45 +0800
committerGitHub <noreply@github.com>2020-05-26 13:44:45 +0300
commit543655176a05e2f6a54dbbadfb754416f14fb215 (patch)
tree984af220bf412c64e358d1fc0435862fb3c5fdf6 /numpy/core
parent3f11db40d80b6ff122f41185b0b93637bdff0e39 (diff)
downloadnumpy-543655176a05e2f6a54dbbadfb754416f14fb215.tar.gz
ENH: Optimize Cpu feature detect in X86, fix for GCC on macOS (#16297)
* Fix cpu detection for X86, GCC, and macOS * Optimize feature detection Co-authored-by: Matti Picus <matti.picus@gmail.com>
Diffstat (limited to 'numpy/core')
-rw-r--r--numpy/core/src/common/npy_cpu_features.c.src20
1 files changed, 14 insertions, 6 deletions
diff --git a/numpy/core/src/common/npy_cpu_features.c.src b/numpy/core/src/common/npy_cpu_features.c.src
index 4f193a471..e676932c9 100644
--- a/numpy/core/src/common/npy_cpu_features.c.src
+++ b/numpy/core/src/common/npy_cpu_features.c.src
@@ -76,11 +76,13 @@ npy__cpu_getxcr0(void)
#if defined(_MSC_VER) || defined (__INTEL_COMPILER)
return _xgetbv(0);
#elif defined(__GNUC__) || defined(__clang__)
+ /* named form of xgetbv not supported on OSX, so must use byte form, see:
+ * https://github.com/asmjit/asmjit/issues/78
+ */
unsigned int eax, edx;
- __asm__("xgetbv" : "=a" (eax), "=d" (edx) : "c" (0));
- return (eax | (unsigned long long)edx << 32);
+ __asm(".byte 0x0F, 0x01, 0xd0" : "=a"(eax), "=d"(edx) : "c"(0));
+ return eax;
#else
- // TODO: handle other x86 compilers
return 0;
#endif
}
@@ -110,7 +112,6 @@ npy__cpu_cpuid(int reg[4], int func_id)
);
#endif
#else
- // TODO: handle other x86 compilers
reg[0] = 0;
#endif
}
@@ -123,8 +124,15 @@ npy__cpu_init_features(void)
// validate platform support
int reg[] = {0, 0, 0, 0};
npy__cpu_cpuid(reg, 0);
- if (reg[0] == 0)
- return;
+ if (reg[0] == 0) {
+ npy__cpu_have[NPY_CPU_FEATURE_MMX] = 1;
+ npy__cpu_have[NPY_CPU_FEATURE_SSE] = 1;
+ npy__cpu_have[NPY_CPU_FEATURE_SSE2] = 1;
+ #ifdef NPY_CPU_AMD64
+ npy__cpu_have[NPY_CPU_FEATURE_SSE3] = 1;
+ #endif
+ return;
+ }
npy__cpu_cpuid(reg, 1);
npy__cpu_have[NPY_CPU_FEATURE_MMX] = (reg[3] & (1 << 23)) != 0;