diff options
author | Chunlin <834352945@qq.com> | 2021-01-06 03:20:10 +0800 |
---|---|---|
committer | GitHub <noreply@github.com> | 2021-01-05 21:20:10 +0200 |
commit | da887a666ad975ece7fb7465005aa99c0ddef8d2 (patch) | |
tree | 554700ec26e710fae3a0275cce8179e3bea592ec /numpy | |
parent | 444f696f69c98208aab00ecbcec7dfd1689da9fb (diff) | |
download | numpy-da887a666ad975ece7fb7465005aa99c0ddef8d2.tar.gz |
MAINT: CPUs that support unaligned access. (#18065)
* add CPUs that support unaligned access.
* add comments demonstrate the common scenoirs of unaligned access.
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/core/include/numpy/npy_common.h | 8 | ||||
-rw-r--r-- | numpy/core/include/numpy/npy_cpu.h | 14 | ||||
-rw-r--r-- | numpy/core/src/multiarray/common.h | 2 | ||||
-rw-r--r-- | numpy/core/src/multiarray/compiled_base.c | 2 | ||||
-rw-r--r-- | numpy/core/src/multiarray/item_selection.c | 2 | ||||
-rw-r--r-- | numpy/core/src/multiarray/lowlevel_strided_loops.c.src | 2 |
6 files changed, 14 insertions, 16 deletions
diff --git a/numpy/core/include/numpy/npy_common.h b/numpy/core/include/numpy/npy_common.h index d5a586c56..c8495db8e 100644 --- a/numpy/core/include/numpy/npy_common.h +++ b/numpy/core/include/numpy/npy_common.h @@ -10,14 +10,6 @@ #include <npy_config.h> #endif -// int*, int64* should be propertly aligned on ARMv7 to avoid bus error -#if !defined(NPY_STRONG_ALIGNMENT) && defined(__arm__) && !(defined(__aarch64__) || defined(_M_ARM64)) -#define NPY_STRONG_ALIGNMENT 1 -#endif -#if !defined(NPY_STRONG_ALIGNMENT) -#define NPY_STRONG_ALIGNMENT 0 -#endif - // compile time environment variables #ifndef NPY_RELAXED_STRIDES_CHECKING #define NPY_RELAXED_STRIDES_CHECKING 0 diff --git a/numpy/core/include/numpy/npy_cpu.h b/numpy/core/include/numpy/npy_cpu.h index 4dbf9d84e..065176ac5 100644 --- a/numpy/core/include/numpy/npy_cpu.h +++ b/numpy/core/include/numpy/npy_cpu.h @@ -110,10 +110,16 @@ information about your platform (OS, CPU and compiler) #endif -#if (defined(NPY_CPU_X86) || defined(NPY_CPU_AMD64)) -#define NPY_CPU_HAVE_UNALIGNED_ACCESS 1 -#else -#define NPY_CPU_HAVE_UNALIGNED_ACCESS 0 +/* + * Except for the following architectures, memory access is limited to the natural + * alignment of data types otherwise it may lead to bus error or performance regression. + * For more details about unaligned access, see https://www.kernel.org/doc/Documentation/unaligned-memory-access.txt. +*/ +#if defined(NPY_CPU_X86) || defined(NPY_CPU_AMD64) || defined(__aarch64__) || defined(__powerpc64__) + #define NPY_ALIGNMENT_REQUIRED 0 +#endif +#ifndef NPY_ALIGNMENT_REQUIRED + #define NPY_ALIGNMENT_REQUIRED 1 #endif #endif diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h index ef9bc79da..2f2e7e25b 100644 --- a/numpy/core/src/multiarray/common.h +++ b/numpy/core/src/multiarray/common.h @@ -267,7 +267,7 @@ npy_memchr(char * haystack, char needle, } else { /* usually find elements to skip path */ - if (NPY_CPU_HAVE_UNALIGNED_ACCESS && needle == 0 && stride == 1) { + if (!NPY_ALIGNMENT_REQUIRED && needle == 0 && stride == 1) { /* iterate until last multiple of 4 */ char * block_end = haystack + size - (size % sizeof(unsigned int)); while (p < block_end) { diff --git a/numpy/core/src/multiarray/compiled_base.c b/numpy/core/src/multiarray/compiled_base.c index 6ae4dda6b..fa5d7db75 100644 --- a/numpy/core/src/multiarray/compiled_base.c +++ b/numpy/core/src/multiarray/compiled_base.c @@ -1521,7 +1521,7 @@ pack_inner(const char *inptr, bb[2] = npyv_tobits_b8(npyv_cmpneq_u8(v2, v_zero)); bb[3] = npyv_tobits_b8(npyv_cmpneq_u8(v3, v_zero)); if(out_stride == 1 && - (!NPY_STRONG_ALIGNMENT || isAligned)) { + (!NPY_ALIGNMENT_REQUIRED || isAligned)) { npy_uint64 *ptr64 = (npy_uint64*)outptr; #if NPY_SIMD_WIDTH == 16 npy_uint64 bcomp = bb[0] | (bb[1] << 16) | (bb[2] << 32) | (bb[3] << 48); diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c index 77fff5eb4..8e4b2ebe1 100644 --- a/numpy/core/src/multiarray/item_selection.c +++ b/numpy/core/src/multiarray/item_selection.c @@ -2245,7 +2245,7 @@ count_boolean_trues(int ndim, char *data, npy_intp const *ashape, npy_intp const count += count_nonzero_bytes((const npy_uint8 *)d, stride); d += stride; #else - if (NPY_CPU_HAVE_UNALIGNED_ACCESS || + if (!NPY_ALIGNMENT_REQUIRED || npy_is_aligned(d, sizeof(npy_uint64))) { npy_uintp stride = 6 * sizeof(npy_uint64); for (; d < e - (shape[0] % stride); d += stride) { diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src index 0590558be..b8ebee6ed 100644 --- a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src +++ b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src @@ -29,7 +29,7 @@ * instructions (16 byte). * So this flag can only be enabled if autovectorization is disabled. */ -#if NPY_CPU_HAVE_UNALIGNED_ACCESS +#if NPY_ALIGNMENT_REQUIRED # define NPY_USE_UNALIGNED_ACCESS 0 #else # define NPY_USE_UNALIGNED_ACCESS 0 |