summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--numpy/core/src/multiarray/lowlevel_strided_loops.c.src33
-rw-r--r--numpy/core/src/private/lowlevel_strided_loops.h76
-rw-r--r--numpy/core/src/umath/loops.c.src5
3 files changed, 82 insertions, 32 deletions
diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
index ef29b855e..5c02c6e9f 100644
--- a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
+++ b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
@@ -36,27 +36,16 @@
#define _NPY_NOP4(x) (x)
#define _NPY_NOP8(x) (x)
-#define _NPY_SWAP2(x) (((((npy_uint16)x)&0xffu) << 8) | \
- (((npy_uint16)x) >> 8))
+#define _NPY_SWAP2(x) npy_bswap2(x)
-#define _NPY_SWAP4(x) (((((npy_uint32)x)&0xffu) << 24) | \
- ((((npy_uint32)x)&0xff00u) << 8) | \
- ((((npy_uint32)x)&0xff0000u) >> 8) | \
- (((npy_uint32)x) >> 24))
+#define _NPY_SWAP4(x) npy_bswap4(x)
#define _NPY_SWAP_PAIR4(x) (((((npy_uint32)x)&0xffu) << 8) | \
((((npy_uint32)x)&0xff00u) >> 8) | \
((((npy_uint32)x)&0xff0000u) << 8) | \
((((npy_uint32)x)&0xff000000u) >> 8))
-#define _NPY_SWAP8(x) (((((npy_uint64)x)&0xffULL) << 56) | \
- ((((npy_uint64)x)&0xff00ULL) << 40) | \
- ((((npy_uint64)x)&0xff0000ULL) << 24) | \
- ((((npy_uint64)x)&0xff000000ULL) << 8) | \
- ((((npy_uint64)x)&0xff00000000ULL) >> 8) | \
- ((((npy_uint64)x)&0xff0000000000ULL) >> 24) | \
- ((((npy_uint64)x)&0xff000000000000ULL) >> 40) | \
- (((npy_uint64)x) >> 56))
+#define _NPY_SWAP8(x) npy_bswap8(x)
#define _NPY_SWAP_PAIR8(x) (((((npy_uint64)x)&0xffULL) << 24) | \
((((npy_uint64)x)&0xff00ULL) << 8) | \
@@ -67,21 +56,11 @@
((((npy_uint64)x)&0xff000000000000ULL) >> 8) | \
((((npy_uint64)x)&0xff00000000000000ULL) >> 24))
-#define _NPY_SWAP_INPLACE2(x) { \
- char a = (x)[0]; (x)[0] = (x)[1]; (x)[1] = a; \
- }
+#define _NPY_SWAP_INPLACE2(x) npy_bswap2_unaligned(x)
-#define _NPY_SWAP_INPLACE4(x) { \
- char a = (x)[0]; (x)[0] = (x)[3]; (x)[3] = a; \
- a = (x)[1]; (x)[1] = (x)[2]; (x)[2] = a; \
- }
+#define _NPY_SWAP_INPLACE4(x) npy_bswap4_unaligned(x)
-#define _NPY_SWAP_INPLACE8(x) { \
- char a = (x)[0]; (x)[0] = (x)[7]; (x)[7] = a; \
- a = (x)[1]; (x)[1] = (x)[6]; (x)[6] = a; \
- a = (x)[2]; (x)[2] = (x)[5]; (x)[5] = a; \
- a = (x)[3]; (x)[3] = (x)[4]; (x)[4] = a; \
- }
+#define _NPY_SWAP_INPLACE8(x) npy_bswap8_unaligned(x)
#define _NPY_SWAP_INPLACE16(x) { \
char a = (x)[0]; (x)[0] = (x)[15]; (x)[15] = a; \
diff --git a/numpy/core/src/private/lowlevel_strided_loops.h b/numpy/core/src/private/lowlevel_strided_loops.h
index 742882a92..fffd02e03 100644
--- a/numpy/core/src/private/lowlevel_strided_loops.h
+++ b/numpy/core/src/private/lowlevel_strided_loops.h
@@ -1,5 +1,6 @@
#ifndef __LOWLEVEL_STRIDED_LOOPS_H
#define __LOWLEVEL_STRIDED_LOOPS_H
+#include <npy_config.h>
/*
* NOTE: This API should remain private for the time being, to allow
@@ -396,6 +397,14 @@ PyArray_PrepareThreeRawArrayIter(int ndim, npy_intp *shape,
char **out_dataB, npy_intp *out_stridesB,
char **out_dataC, npy_intp *out_stridesC);
+/*
+ * return true if pointer is aligned to 'alignment'
+ */
+static NPY_INLINE int
+npy_is_aligned(const void * p, const npy_uintp alignment)
+{
+ return ((npy_uintp)(p) & ((alignment) - 1)) == 0;
+}
/*
* Return number of elements that must be peeled from
@@ -441,6 +450,73 @@ npy_blocked_end(const npy_intp offset, const npy_intp esize,
}
+/* byte swapping functions */
+static NPY_INLINE npy_uint16
+npy_bswap2(npy_uint16 x)
+{
+ return ((x & 0xffu) << 8) | (x >> 8);
+}
+
+/*
+ * treat as int16 and byteswap unaligned memory,
+ * some cpus don't support unaligned access
+ */
+static NPY_INLINE void
+npy_bswap2_unaligned(char * x)
+{
+ char a = x[0];
+ x[0] = x[1];
+ x[1] = a;
+}
+
+static NPY_INLINE npy_uint32
+npy_bswap4(npy_uint32 x)
+{
+#ifdef HAVE___BUILTIN_BSWAP32
+ return __builtin_bswap32(x);
+#else
+ return ((x & 0xffu) << 24) | ((x & 0xff00u) << 8) |
+ ((x & 0xff0000u) >> 8) | (x >> 24);
+#endif
+}
+
+static NPY_INLINE void
+npy_bswap4_unaligned(char * x)
+{
+ char a = x[0];
+ x[0] = x[3];
+ x[3] = a;
+ a = x[1];
+ x[1] = x[2];
+ x[2] = a;
+}
+
+static NPY_INLINE npy_uint64
+npy_bswap8(npy_uint64 x)
+{
+#ifdef HAVE___BUILTIN_BSWAP64
+ return __builtin_bswap64(x);
+#else
+ return ((x & 0xffULL) << 56) |
+ ((x & 0xff00ULL) << 40) |
+ ((x & 0xff0000ULL) << 24) |
+ ((x & 0xff000000ULL) << 8) |
+ ((x & 0xff00000000ULL) >> 8) |
+ ((x & 0xff0000000000ULL) >> 24) |
+ ((x & 0xff000000000000ULL) >> 40) |
+ ( x >> 56);
+#endif
+}
+
+static NPY_INLINE void
+npy_bswap8_unaligned(char * x)
+{
+ char a = x[0]; x[0] = x[7]; x[7] = a;
+ a = x[1]; x[1] = x[6]; x[6] = a;
+ a = x[2]; x[2] = x[5]; x[5] = a;
+ a = x[3]; x[3] = x[4]; x[4] = a;
+}
+
/* Start raw iteration */
#define NPY_RAW_ITER_START(idim, ndim, coord, shape) \
diff --git a/numpy/core/src/umath/loops.c.src b/numpy/core/src/umath/loops.c.src
index e307faa46..5eae448ee 100644
--- a/numpy/core/src/umath/loops.c.src
+++ b/numpy/core/src/umath/loops.c.src
@@ -34,11 +34,6 @@
*/
-static NPY_INLINE int npy_is_aligned(const void * p, const npy_intp alignment)
-{
- return ((npy_intp)(p) & ((alignment) - 1)) == 0;
-}
-
#define IS_BINARY_REDUCE ((args[0] == args[2])\
&& (steps[0] == steps[2])\
&& (steps[0] == 0))