diff options
-rw-r--r-- | numpy/core/src/multiarray/lowlevel_strided_loops.c.src | 9 |
1 files changed, 7 insertions, 2 deletions
diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src index b0770168f..0adb24c8d 100644 --- a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src +++ b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src @@ -21,10 +21,15 @@ #include "lowlevel_strided_loops.h" /* - * x86 platform works with unaligned access + * x86 platform works with unaligned access but the compiler is allowed to + * assume all data is aligned to its size by the C standard. This means it can + * vectorize instructions peeling only by the size of the type, if the data is + * not aligned to this size one ends up with data not correctly aligned for SSE + * instructions (16 byte). + * So this flag can only be enabled if autovectorization is disabled. */ #if (defined(NPY_CPU_X86) || defined(NPY_CPU_AMD64)) -# define NPY_USE_UNALIGNED_ACCESS 1 +# define NPY_USE_UNALIGNED_ACCESS 0 #else # define NPY_USE_UNALIGNED_ACCESS 0 #endif |