summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--numpy/core/src/multiarray/array_assign.c3
-rw-r--r--numpy/core/src/multiarray/array_assign_scalar.c6
-rw-r--r--numpy/core/src/multiarray/ctors.c56
-rw-r--r--numpy/core/src/multiarray/lowlevel_strided_loops.c.src33
-rw-r--r--numpy/core/src/private/lowlevel_strided_loops.h76
-rw-r--r--numpy/core/src/umath/loops.c.src5
6 files changed, 117 insertions, 62 deletions
diff --git a/numpy/core/src/multiarray/array_assign.c b/numpy/core/src/multiarray/array_assign.c
index 6467b6cfd..fa764d758 100644
--- a/numpy/core/src/multiarray/array_assign.c
+++ b/numpy/core/src/multiarray/array_assign.c
@@ -22,6 +22,7 @@
#include "array_assign.h"
#include "common.h"
+#include "lowlevel_strided_loops.h"
/* See array_assign.h for parameter documentation */
NPY_NO_EXPORT int
@@ -92,7 +93,7 @@ raw_array_is_aligned(int ndim, char *data, npy_intp *strides, int alignment)
align_check |= strides[idim];
}
- return ((align_check & (alignment - 1)) == 0);
+ return npy_is_aligned((void *)align_check, alignment);
}
else {
return 1;
diff --git a/numpy/core/src/multiarray/array_assign_scalar.c b/numpy/core/src/multiarray/array_assign_scalar.c
index 2c1154264..df7facad6 100644
--- a/numpy/core/src/multiarray/array_assign_scalar.c
+++ b/numpy/core/src/multiarray/array_assign_scalar.c
@@ -48,7 +48,7 @@ raw_array_assign_scalar(int ndim, npy_intp *shape,
/* Check alignment */
aligned = raw_array_is_aligned(ndim, dst_data, dst_strides,
dst_dtype->alignment);
- if (((npy_intp)src_data & (src_dtype->alignment - 1)) != 0) {
+ if (!npy_is_aligned(src_data, src_dtype->alignment)) {
aligned = 0;
}
@@ -119,7 +119,7 @@ raw_array_wheremasked_assign_scalar(int ndim, npy_intp *shape,
/* Check alignment */
aligned = raw_array_is_aligned(ndim, dst_data, dst_strides,
dst_dtype->alignment);
- if (((npy_intp)src_data & (src_dtype->alignment - 1)) != 0) {
+ if (!npy_is_aligned(src_data, src_dtype->alignment)) {
aligned = 0;
}
@@ -220,7 +220,7 @@ PyArray_AssignRawScalar(PyArrayObject *dst,
* we also skip this if 'dst' has an object dtype.
*/
if ((!PyArray_EquivTypes(PyArray_DESCR(dst), src_dtype) ||
- ((npy_intp)src_data & (src_dtype->alignment - 1)) != 0) &&
+ !npy_is_aligned(src_data, src_dtype->alignment)) &&
PyArray_SIZE(dst) > 1 &&
!PyDataType_REFCHK(PyArray_DESCR(dst))) {
char *tmp_src_data;
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index b1a9d9859..5c692bd02 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -311,38 +311,42 @@ _strided_byte_swap(void *p, npy_intp stride, npy_intp n, int size)
case 1: /* no byteswap necessary */
break;
case 4:
- for (a = (char*)p; n > 0; n--, a += stride) {
- npy_uint32 * a_ = (npy_uint32 *)a;
-#ifdef HAVE___BUILTIN_BSWAP32
- *a_ = __builtin_bswap32(*a_);
-#else
- /* a decent compiler can convert this to bswap too */
- *a_ = ((*a_ & 0xff000000u) >> 24) | ((*a_ & 0x00ff0000u) >> 8) |
- ((*a_ & 0x0000ff00u) << 8) | ((*a_ & 0x000000ffu) << 24);
-#endif
+ if (npy_is_aligned(p, sizeof(npy_uint32))) {
+ for (a = (char*)p; n > 0; n--, a += stride) {
+ npy_uint32 * a_ = (npy_uint32 *)a;
+ *a_ = npy_bswap4(*a_);
+ }
+ }
+ else {
+ for (a = (char*)p; n > 0; n--, a += stride) {
+ npy_bswap4_unaligned(a);
+ }
}
break;
case 8:
- for (a = (char*)p; n > 0; n--) {
-#ifdef HAVE___BUILTIN_BSWAP64
- npy_uint64 * a_ = (npy_uint64 *)a;
- *a_ = __builtin_bswap64(*a_);
- a += stride;
-#else
- /* mask version would be faster but requires C99 */
- b = a + 7;
- c = *a; *a++ = *b; *b-- = c;
- c = *a; *a++ = *b; *b-- = c;
- c = *a; *a++ = *b; *b-- = c;
- c = *a; *a = *b; *b = c;
- a += stride - 3;
-#endif
+ if (npy_is_aligned(p, sizeof(npy_uint64))) {
+ for (a = (char*)p; n > 0; n--, a += stride) {
+ npy_uint64 * a_ = (npy_uint64 *)a;
+ *a_ = npy_bswap8(*a_);
+ }
+ }
+ else {
+ for (a = (char*)p; n > 0; n--, a += stride) {
+ npy_bswap8_unaligned(a);
+ }
}
break;
case 2:
- for (a = (char*)p; n > 0; n--, a += stride) {
- npy_uint16 * a_ = (npy_uint16 *)a;
- *a_ = (((*a_ >> 8) & 0xffu) | ((*a_ & 0xffu) << 8));
+ if (npy_is_aligned(p, sizeof(npy_uint16))) {
+ for (a = (char*)p; n > 0; n--, a += stride) {
+ npy_uint16 * a_ = (npy_uint16 *)a;
+ *a_ = npy_bswap2(*a_);
+ }
+ }
+ else {
+ for (a = (char*)p; n > 0; n--, a += stride) {
+ npy_bswap2_unaligned(a);
+ }
}
break;
default:
diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
index ef29b855e..5c02c6e9f 100644
--- a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
+++ b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
@@ -36,27 +36,16 @@
#define _NPY_NOP4(x) (x)
#define _NPY_NOP8(x) (x)
-#define _NPY_SWAP2(x) (((((npy_uint16)x)&0xffu) << 8) | \
- (((npy_uint16)x) >> 8))
+#define _NPY_SWAP2(x) npy_bswap2(x)
-#define _NPY_SWAP4(x) (((((npy_uint32)x)&0xffu) << 24) | \
- ((((npy_uint32)x)&0xff00u) << 8) | \
- ((((npy_uint32)x)&0xff0000u) >> 8) | \
- (((npy_uint32)x) >> 24))
+#define _NPY_SWAP4(x) npy_bswap4(x)
#define _NPY_SWAP_PAIR4(x) (((((npy_uint32)x)&0xffu) << 8) | \
((((npy_uint32)x)&0xff00u) >> 8) | \
((((npy_uint32)x)&0xff0000u) << 8) | \
((((npy_uint32)x)&0xff000000u) >> 8))
-#define _NPY_SWAP8(x) (((((npy_uint64)x)&0xffULL) << 56) | \
- ((((npy_uint64)x)&0xff00ULL) << 40) | \
- ((((npy_uint64)x)&0xff0000ULL) << 24) | \
- ((((npy_uint64)x)&0xff000000ULL) << 8) | \
- ((((npy_uint64)x)&0xff00000000ULL) >> 8) | \
- ((((npy_uint64)x)&0xff0000000000ULL) >> 24) | \
- ((((npy_uint64)x)&0xff000000000000ULL) >> 40) | \
- (((npy_uint64)x) >> 56))
+#define _NPY_SWAP8(x) npy_bswap8(x)
#define _NPY_SWAP_PAIR8(x) (((((npy_uint64)x)&0xffULL) << 24) | \
((((npy_uint64)x)&0xff00ULL) << 8) | \
@@ -67,21 +56,11 @@
((((npy_uint64)x)&0xff000000000000ULL) >> 8) | \
((((npy_uint64)x)&0xff00000000000000ULL) >> 24))
-#define _NPY_SWAP_INPLACE2(x) { \
- char a = (x)[0]; (x)[0] = (x)[1]; (x)[1] = a; \
- }
+#define _NPY_SWAP_INPLACE2(x) npy_bswap2_unaligned(x)
-#define _NPY_SWAP_INPLACE4(x) { \
- char a = (x)[0]; (x)[0] = (x)[3]; (x)[3] = a; \
- a = (x)[1]; (x)[1] = (x)[2]; (x)[2] = a; \
- }
+#define _NPY_SWAP_INPLACE4(x) npy_bswap4_unaligned(x)
-#define _NPY_SWAP_INPLACE8(x) { \
- char a = (x)[0]; (x)[0] = (x)[7]; (x)[7] = a; \
- a = (x)[1]; (x)[1] = (x)[6]; (x)[6] = a; \
- a = (x)[2]; (x)[2] = (x)[5]; (x)[5] = a; \
- a = (x)[3]; (x)[3] = (x)[4]; (x)[4] = a; \
- }
+#define _NPY_SWAP_INPLACE8(x) npy_bswap8_unaligned(x)
#define _NPY_SWAP_INPLACE16(x) { \
char a = (x)[0]; (x)[0] = (x)[15]; (x)[15] = a; \
diff --git a/numpy/core/src/private/lowlevel_strided_loops.h b/numpy/core/src/private/lowlevel_strided_loops.h
index 742882a92..fffd02e03 100644
--- a/numpy/core/src/private/lowlevel_strided_loops.h
+++ b/numpy/core/src/private/lowlevel_strided_loops.h
@@ -1,5 +1,6 @@
#ifndef __LOWLEVEL_STRIDED_LOOPS_H
#define __LOWLEVEL_STRIDED_LOOPS_H
+#include <npy_config.h>
/*
* NOTE: This API should remain private for the time being, to allow
@@ -396,6 +397,14 @@ PyArray_PrepareThreeRawArrayIter(int ndim, npy_intp *shape,
char **out_dataB, npy_intp *out_stridesB,
char **out_dataC, npy_intp *out_stridesC);
+/*
+ * return true if pointer is aligned to 'alignment'
+ */
+static NPY_INLINE int
+npy_is_aligned(const void * p, const npy_uintp alignment)
+{
+ return ((npy_uintp)(p) & ((alignment) - 1)) == 0;
+}
/*
* Return number of elements that must be peeled from
@@ -441,6 +450,73 @@ npy_blocked_end(const npy_intp offset, const npy_intp esize,
}
+/* byte swapping functions */
+static NPY_INLINE npy_uint16
+npy_bswap2(npy_uint16 x)
+{
+ return ((x & 0xffu) << 8) | (x >> 8);
+}
+
+/*
+ * treat as int16 and byteswap unaligned memory,
+ * some cpus don't support unaligned access
+ */
+static NPY_INLINE void
+npy_bswap2_unaligned(char * x)
+{
+ char a = x[0];
+ x[0] = x[1];
+ x[1] = a;
+}
+
+static NPY_INLINE npy_uint32
+npy_bswap4(npy_uint32 x)
+{
+#ifdef HAVE___BUILTIN_BSWAP32
+ return __builtin_bswap32(x);
+#else
+ return ((x & 0xffu) << 24) | ((x & 0xff00u) << 8) |
+ ((x & 0xff0000u) >> 8) | (x >> 24);
+#endif
+}
+
+static NPY_INLINE void
+npy_bswap4_unaligned(char * x)
+{
+ char a = x[0];
+ x[0] = x[3];
+ x[3] = a;
+ a = x[1];
+ x[1] = x[2];
+ x[2] = a;
+}
+
+static NPY_INLINE npy_uint64
+npy_bswap8(npy_uint64 x)
+{
+#ifdef HAVE___BUILTIN_BSWAP64
+ return __builtin_bswap64(x);
+#else
+ return ((x & 0xffULL) << 56) |
+ ((x & 0xff00ULL) << 40) |
+ ((x & 0xff0000ULL) << 24) |
+ ((x & 0xff000000ULL) << 8) |
+ ((x & 0xff00000000ULL) >> 8) |
+ ((x & 0xff0000000000ULL) >> 24) |
+ ((x & 0xff000000000000ULL) >> 40) |
+ ( x >> 56);
+#endif
+}
+
+static NPY_INLINE void
+npy_bswap8_unaligned(char * x)
+{
+ char a = x[0]; x[0] = x[7]; x[7] = a;
+ a = x[1]; x[1] = x[6]; x[6] = a;
+ a = x[2]; x[2] = x[5]; x[5] = a;
+ a = x[3]; x[3] = x[4]; x[4] = a;
+}
+
/* Start raw iteration */
#define NPY_RAW_ITER_START(idim, ndim, coord, shape) \
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index e307faa46..5eae448ee 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -34,11 +34,6 @@
*/
-static NPY_INLINE int npy_is_aligned(const void * p, const npy_intp alignment)
-{
- return ((npy_intp)(p) & ((alignment) - 1)) == 0;
-}
-
#define IS_BINARY_REDUCE ((args[0] == args[2])\
&& (steps[0] == steps[2])\
&& (steps[0] == 0))