summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
authorChunlin <834352945@qq.com>2021-01-06 03:20:10 +0800
committerGitHub <noreply@github.com>2021-01-05 21:20:10 +0200
commitda887a666ad975ece7fb7465005aa99c0ddef8d2 (patch)
tree554700ec26e710fae3a0275cce8179e3bea592ec /numpy
parent444f696f69c98208aab00ecbcec7dfd1689da9fb (diff)
downloadnumpy-da887a666ad975ece7fb7465005aa99c0ddef8d2.tar.gz
MAINT: CPUs that support unaligned access. (#18065)
* add CPUs that support unaligned access. * add comments demonstrate the common scenoirs of unaligned access.
Diffstat (limited to 'numpy')
-rw-r--r--numpy/core/include/numpy/npy_common.h8
-rw-r--r--numpy/core/include/numpy/npy_cpu.h14
-rw-r--r--numpy/core/src/multiarray/common.h2
-rw-r--r--numpy/core/src/multiarray/compiled_base.c2
-rw-r--r--numpy/core/src/multiarray/item_selection.c2
-rw-r--r--numpy/core/src/multiarray/lowlevel_strided_loops.c.src2
6 files changed, 14 insertions, 16 deletions
diff --git a/numpy/core/include/numpy/npy_common.h b/numpy/core/include/numpy/npy_common.h
index d5a586c56..c8495db8e 100644
--- a/numpy/core/include/numpy/npy_common.h
+++ b/numpy/core/include/numpy/npy_common.h
@@ -10,14 +10,6 @@
#include <npy_config.h>
#endif
-// int*, int64* should be propertly aligned on ARMv7 to avoid bus error
-#if !defined(NPY_STRONG_ALIGNMENT) && defined(__arm__) && !(defined(__aarch64__) || defined(_M_ARM64))
-#define NPY_STRONG_ALIGNMENT 1
-#endif
-#if !defined(NPY_STRONG_ALIGNMENT)
-#define NPY_STRONG_ALIGNMENT 0
-#endif
-
// compile time environment variables
#ifndef NPY_RELAXED_STRIDES_CHECKING
#define NPY_RELAXED_STRIDES_CHECKING 0
diff --git a/numpy/core/include/numpy/npy_cpu.h b/numpy/core/include/numpy/npy_cpu.h
index 4dbf9d84e..065176ac5 100644
--- a/numpy/core/include/numpy/npy_cpu.h
+++ b/numpy/core/include/numpy/npy_cpu.h
@@ -110,10 +110,16 @@
information about your platform (OS, CPU and compiler)
#endif
-#if (defined(NPY_CPU_X86) || defined(NPY_CPU_AMD64))
-#define NPY_CPU_HAVE_UNALIGNED_ACCESS 1
-#else
-#define NPY_CPU_HAVE_UNALIGNED_ACCESS 0
+/*
+ * Except for the following architectures, memory access is limited to the natural
+ * alignment of data types otherwise it may lead to bus error or performance regression.
+ * For more details about unaligned access, see https://www.kernel.org/doc/Documentation/unaligned-memory-access.txt.
+*/
+#if defined(NPY_CPU_X86) || defined(NPY_CPU_AMD64) || defined(__aarch64__) || defined(__powerpc64__)
+ #define NPY_ALIGNMENT_REQUIRED 0
+#endif
+#ifndef NPY_ALIGNMENT_REQUIRED
+ #define NPY_ALIGNMENT_REQUIRED 1
#endif
#endif
diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h
index ef9bc79da..2f2e7e25b 100644
--- a/numpy/core/src/multiarray/common.h
+++ b/numpy/core/src/multiarray/common.h
@@ -267,7 +267,7 @@ npy_memchr(char * haystack, char needle,
}
else {
/* usually find elements to skip path */
- if (NPY_CPU_HAVE_UNALIGNED_ACCESS && needle == 0 && stride == 1) {
+ if (!NPY_ALIGNMENT_REQUIRED && needle == 0 && stride == 1) {
/* iterate until last multiple of 4 */
char * block_end = haystack + size - (size % sizeof(unsigned int));
while (p < block_end) {
diff --git a/numpy/core/src/multiarray/compiled_base.c b/numpy/core/src/multiarray/compiled_base.c
index 6ae4dda6b..fa5d7db75 100644
--- a/numpy/core/src/multiarray/compiled_base.c
+++ b/numpy/core/src/multiarray/compiled_base.c
@@ -1521,7 +1521,7 @@ pack_inner(const char *inptr,
bb[2] = npyv_tobits_b8(npyv_cmpneq_u8(v2, v_zero));
bb[3] = npyv_tobits_b8(npyv_cmpneq_u8(v3, v_zero));
if(out_stride == 1 &&
- (!NPY_STRONG_ALIGNMENT || isAligned)) {
+ (!NPY_ALIGNMENT_REQUIRED || isAligned)) {
npy_uint64 *ptr64 = (npy_uint64*)outptr;
#if NPY_SIMD_WIDTH == 16
npy_uint64 bcomp = bb[0] | (bb[1] << 16) | (bb[2] << 32) | (bb[3] << 48);
diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c
index 77fff5eb4..8e4b2ebe1 100644
--- a/numpy/core/src/multiarray/item_selection.c
+++ b/numpy/core/src/multiarray/item_selection.c
@@ -2245,7 +2245,7 @@ count_boolean_trues(int ndim, char *data, npy_intp const *ashape, npy_intp const
count += count_nonzero_bytes((const npy_uint8 *)d, stride);
d += stride;
#else
- if (NPY_CPU_HAVE_UNALIGNED_ACCESS ||
+ if (!NPY_ALIGNMENT_REQUIRED ||
npy_is_aligned(d, sizeof(npy_uint64))) {
npy_uintp stride = 6 * sizeof(npy_uint64);
for (; d < e - (shape[0] % stride); d += stride) {
diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
index 0590558be..b8ebee6ed 100644
--- a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
+++ b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
@@ -29,7 +29,7 @@
* instructions (16 byte).
* So this flag can only be enabled if autovectorization is disabled.
*/
-#if NPY_CPU_HAVE_UNALIGNED_ACCESS
+#if NPY_ALIGNMENT_REQUIRED
# define NPY_USE_UNALIGNED_ACCESS 0
#else
# define NPY_USE_UNALIGNED_ACCESS 0