summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJulian Taylor <jtaylor.debian@googlemail.com>2013-05-28 20:45:59 +0200
committerJulian Taylor <jtaylor.debian@googlemail.com>2013-05-29 19:58:51 +0200
commit80befa1cbc96e15cffd98273557bc68bc6fb4860 (patch)
treec6253bdba81781297367facf70fae532be282fd4
parentf52cc7d30b3c0bcaf7bece88e57a88bcec799ce9 (diff)
downloadnumpy-80befa1cbc96e15cffd98273557bc68bc6fb4860.tar.gz
BUG: check alignment before loading the data
some cpus don't support loads that are not aligned to the elementsize. regression introduced in 99cb95f7379a9.
-rw-r--r--numpy/core/src/multiarray/ctors.c56
1 files changed, 30 insertions, 26 deletions
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index b1a9d9859..5c692bd02 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -311,38 +311,42 @@ _strided_byte_swap(void *p, npy_intp stride, npy_intp n, int size)
case 1: /* no byteswap necessary */
break;
case 4:
- for (a = (char*)p; n > 0; n--, a += stride) {
- npy_uint32 * a_ = (npy_uint32 *)a;
-#ifdef HAVE___BUILTIN_BSWAP32
- *a_ = __builtin_bswap32(*a_);
-#else
- /* a decent compiler can convert this to bswap too */
- *a_ = ((*a_ & 0xff000000u) >> 24) | ((*a_ & 0x00ff0000u) >> 8) |
- ((*a_ & 0x0000ff00u) << 8) | ((*a_ & 0x000000ffu) << 24);
-#endif
+ if (npy_is_aligned(p, sizeof(npy_uint32))) {
+ for (a = (char*)p; n > 0; n--, a += stride) {
+ npy_uint32 * a_ = (npy_uint32 *)a;
+ *a_ = npy_bswap4(*a_);
+ }
+ }
+ else {
+ for (a = (char*)p; n > 0; n--, a += stride) {
+ npy_bswap4_unaligned(a);
+ }
}
break;
case 8:
- for (a = (char*)p; n > 0; n--) {
-#ifdef HAVE___BUILTIN_BSWAP64
- npy_uint64 * a_ = (npy_uint64 *)a;
- *a_ = __builtin_bswap64(*a_);
- a += stride;
-#else
- /* mask version would be faster but requires C99 */
- b = a + 7;
- c = *a; *a++ = *b; *b-- = c;
- c = *a; *a++ = *b; *b-- = c;
- c = *a; *a++ = *b; *b-- = c;
- c = *a; *a = *b; *b = c;
- a += stride - 3;
-#endif
+ if (npy_is_aligned(p, sizeof(npy_uint64))) {
+ for (a = (char*)p; n > 0; n--, a += stride) {
+ npy_uint64 * a_ = (npy_uint64 *)a;
+ *a_ = npy_bswap8(*a_);
+ }
+ }
+ else {
+ for (a = (char*)p; n > 0; n--, a += stride) {
+ npy_bswap8_unaligned(a);
+ }
}
break;
case 2:
- for (a = (char*)p; n > 0; n--, a += stride) {
- npy_uint16 * a_ = (npy_uint16 *)a;
- *a_ = (((*a_ >> 8) & 0xffu) | ((*a_ & 0xffu) << 8));
+ if (npy_is_aligned(p, sizeof(npy_uint16))) {
+ for (a = (char*)p; n > 0; n--, a += stride) {
+ npy_uint16 * a_ = (npy_uint16 *)a;
+ *a_ = npy_bswap2(*a_);
+ }
+ }
+ else {
+ for (a = (char*)p; n > 0; n--, a += stride) {
+ npy_bswap2_unaligned(a);
+ }
}
break;
default: