diff options
author | Julian Taylor <jtaylor.debian@googlemail.com> | 2013-05-28 20:45:59 +0200 |
---|---|---|
committer | Julian Taylor <jtaylor.debian@googlemail.com> | 2013-05-29 19:58:51 +0200 |
commit | 80befa1cbc96e15cffd98273557bc68bc6fb4860 (patch) | |
tree | c6253bdba81781297367facf70fae532be282fd4 /numpy | |
parent | f52cc7d30b3c0bcaf7bece88e57a88bcec799ce9 (diff) | |
download | numpy-80befa1cbc96e15cffd98273557bc68bc6fb4860.tar.gz |
BUG: check alignment before loading the data
some cpus don't support loads that are not aligned to the elementsize.
regression introduced in 99cb95f7379a9.
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/core/src/multiarray/ctors.c | 56 |
1 files changed, 30 insertions, 26 deletions
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index b1a9d9859..5c692bd02 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -311,38 +311,42 @@ _strided_byte_swap(void *p, npy_intp stride, npy_intp n, int size) case 1: /* no byteswap necessary */ break; case 4: - for (a = (char*)p; n > 0; n--, a += stride) { - npy_uint32 * a_ = (npy_uint32 *)a; -#ifdef HAVE___BUILTIN_BSWAP32 - *a_ = __builtin_bswap32(*a_); -#else - /* a decent compiler can convert this to bswap too */ - *a_ = ((*a_ & 0xff000000u) >> 24) | ((*a_ & 0x00ff0000u) >> 8) | - ((*a_ & 0x0000ff00u) << 8) | ((*a_ & 0x000000ffu) << 24); -#endif + if (npy_is_aligned(p, sizeof(npy_uint32))) { + for (a = (char*)p; n > 0; n--, a += stride) { + npy_uint32 * a_ = (npy_uint32 *)a; + *a_ = npy_bswap4(*a_); + } + } + else { + for (a = (char*)p; n > 0; n--, a += stride) { + npy_bswap4_unaligned(a); + } } break; case 8: - for (a = (char*)p; n > 0; n--) { -#ifdef HAVE___BUILTIN_BSWAP64 - npy_uint64 * a_ = (npy_uint64 *)a; - *a_ = __builtin_bswap64(*a_); - a += stride; -#else - /* mask version would be faster but requires C99 */ - b = a + 7; - c = *a; *a++ = *b; *b-- = c; - c = *a; *a++ = *b; *b-- = c; - c = *a; *a++ = *b; *b-- = c; - c = *a; *a = *b; *b = c; - a += stride - 3; -#endif + if (npy_is_aligned(p, sizeof(npy_uint64))) { + for (a = (char*)p; n > 0; n--, a += stride) { + npy_uint64 * a_ = (npy_uint64 *)a; + *a_ = npy_bswap8(*a_); + } + } + else { + for (a = (char*)p; n > 0; n--, a += stride) { + npy_bswap8_unaligned(a); + } } break; case 2: - for (a = (char*)p; n > 0; n--, a += stride) { - npy_uint16 * a_ = (npy_uint16 *)a; - *a_ = (((*a_ >> 8) & 0xffu) | ((*a_ & 0xffu) << 8)); + if (npy_is_aligned(p, sizeof(npy_uint16))) { + for (a = (char*)p; n > 0; n--, a += stride) { + npy_uint16 * a_ = (npy_uint16 *)a; + *a_ = npy_bswap2(*a_); + } + } + else { + for (a = (char*)p; n > 0; n--, a += stride) { + npy_bswap2_unaligned(a); + } } break; default: |