diff options
author | Charles Harris <charlesr.harris@gmail.com> | 2018-09-30 12:52:16 -0500 |
---|---|---|
committer | GitHub <noreply@github.com> | 2018-09-30 12:52:16 -0500 |
commit | 4a7926f7085482a5f1f3669715ca20d20f12099a (patch) | |
tree | 9c595d88ccd5d242661407772b35449125eb2f00 | |
parent | 87c1fcd0308ee78e743401bac2b0085249cca1e5 (diff) | |
parent | 12bd7c372ab5eeeff8bd2a46d765ccf28c6ce486 (diff) | |
download | numpy-4a7926f7085482a5f1f3669715ca20d20f12099a.tar.gz |
Merge pull request #6377 from ahaldane/fix_align
BUG: define "uint-alignment", fixes complex64 alignment
21 files changed, 361 insertions, 141 deletions
diff --git a/doc/release/1.16.0-notes.rst b/doc/release/1.16.0-notes.rst index f85ecb300..72bf96295 100644 --- a/doc/release/1.16.0-notes.rst +++ b/doc/release/1.16.0-notes.rst @@ -69,6 +69,21 @@ old behavior, use ``np.isnat`` to explicitly check for NaT or convert datetime64/timedelta64 arrays with ``.astype(np.int64)`` before making comparisons. +complex64/128 alignment has changed +----------------------------------- +The memory alignment of complex types is now the same as a C-struct composed of +two floating point values, while before it was equal to the size of the type. +For many users (for instance on x64/unix/gcc) this means that complex64 is now +4-byte aligned instead of 8-byte aligned. An important consequence is that +aligned structured dtypes may now have a different size. For instance, +``np.dtype('c8,u1', align=True)`` used to have an itemsize of 16 (on x64/gcc) +but now it is 12. + +More in detail, the complex64 type now has the same alignment as a C-struct +``struct {float r, i;}``, according to the compiler used to compile numpy, and +similarly for the complex128 and complex256 types. + + C API changes ============= diff --git a/doc/source/dev/alignment.rst b/doc/source/dev/alignment.rst new file mode 100644 index 000000000..f067f0d03 --- /dev/null +++ b/doc/source/dev/alignment.rst @@ -0,0 +1,96 @@ +.. _alignment: + + +Numpy Alignment Goals +===================== + +There are three use-cases related to memory alignment in numpy (as of 1.14): + + 1. Creating structured datatypes with fields aligned like in a C-struct. + 2. Speeding up copy operations by using uint assignment in instead of memcpy + 3. Guaranteeing safe aligned access for ufuncs/setitem/casting code + +Numpy uses two different forms of alignment to achieve these goals: +"True alignment" and "Uint alignment". + +"True" alignment refers to the architecture-dependent alignment of an +equivalent C-type in C. For example, in x64 systems ``numpy.float64`` is +equivalent to ``double`` in C. On most systems this has either an alignment of +4 or 8 bytes (and this can be controlled in gcc by the option +``malign-double``). A variable is aligned in memory if its memory offset is a +multiple of its alignment. On some systems (eg sparc) memory alignment is +required, on others it gives a speedup. + +"Uint" alignment depends on the size of a datatype. It is defined to be the +"True alignment" of the uint used by numpy's copy-code to copy the datatype, or +undefined/unaligned if there is no equivalent uint. Currently numpy uses uint8, +uint16, uint32, uint64 and uint64 to copy data of size 1,2,4,8,16 bytes +respectively, and all other sized datatypes cannot be uint-aligned. + +For example, on a (typical linux x64 gcc) system, the numpy ``complex64`` +datatype is implemented as ``struct { float real, imag; }``. This has "true" +alignment of 4 and "uint" alignment of 8 (equal to the true alignment of +``uint64``). + +Variables in Numpy which control and describe alignment +======================================================= + +There are 4 relevant uses of the word ``align`` used in numpy: + + * The ``dtype.alignment`` attribute (``descr->alignment`` in C). This is meant + to reflect the "true alignment" of the type. It has arch-dependent default + values for all datatypes, with the exception of structured types created + with ``align=True`` as described below. + * The ``ALIGNED`` flag of an ndarray, computed in ``IsAligned`` and checked + by ``PyArray_ISALIGNED``. This is computed from ``dtype.alignment``. + It is set to ``True`` if every item in the array is at a memory location + consistent with ``dtype.alignment``, which is the case if the data ptr and + all strides of the array are multiples of that alignment. + * The ``align`` keyword of the dtype constructor, which only affects structured + arrays. If the structure's field offsets are not manually provided numpy + determines offsets automatically. In that case, ``align=True`` pads the + structure so that each field is "true" aligned in memory and sets + ``dtype.alignment`` to be the largest of the field "true" alignments. This + is like what C-structs usually do. Otherwise if offsets or itemsize were + manually provided ``align=True`` simply checks that all the fields are + "true" aligned and that the total itemsize is a multiple of the largest + field alignment. In either case ``dtype.isalignedstruct`` is also set to + True. + * ``IsUintAligned`` is used to determine if an ndarray is "uint aligned" in + an analagous way to how ``IsAligned`` checks for true-alignment. + +Consequences of alignment +========================= + +Here is how the variables above are used: + + 1. Creating aligned structs: In order to know how to offset a field when + ``align=True``, numpy looks up ``field.dtype.alignment``. This includes + fields which are nested structured arrays. + 2. Ufuncs: If the ``ALIGNED`` flag of an array is False, ufuncs will + buffer/cast the array before evaluation. This is needed since ufunc inner + loops access raw elements directly, which might fail on some archs if the + elements are not true-aligned. + 3. Getitem/setitem/copyswap function: Similar to ufuncs, these functions + generally have two code paths. If ``ALIGNED`` is False they will + use a code path that buffers the arguments so they are true-aligned. + 4. Strided copy code: Here, "uint alignment" is used instead. If the itemsize + of an array is equal to 1, 2, 4, 8 or 16 bytes and the array is uint + aligned then instead numpy will do ``*(uintN*)dst) = *(uintN*)src)`` for + appropriate N. Otherwise numpy copies by doing ``memcpy(dst, src, N)``. + 5. Nditer code: Since this often calls the strided copy code, it must + check for "uint alignment". + 6. Cast code: if the array is "uint aligned" this will essentially do + ``*dst = CASTFUNC(*src)``. If not, it does + ``memmove(srcval, src); dstval = CASTFUNC(srcval); memmove(dst, dstval)`` + where dstval/srcval are aligned. + +Note that in principle, only "true alignment" is required for casting code. +However, because the casting code and copy code are deeply intertwined they +both use "uint" alignment. This should be safe assuming uint alignment is +always larger than true alignment, though it can cause unnecessary buffering if +an array is "true aligned" but not "uint aligned". If there is ever a big +rewrite of this code it would be good to allow them to use different +alignments. + + diff --git a/numpy/core/src/common/array_assign.c b/numpy/core/src/common/array_assign.c index a48e245d8..ac3fdbef7 100644 --- a/numpy/core/src/common/array_assign.c +++ b/numpy/core/src/common/array_assign.c @@ -84,14 +84,43 @@ broadcast_error: { /* See array_assign.h for parameter documentation */ NPY_NO_EXPORT int -raw_array_is_aligned(int ndim, char *data, npy_intp *strides, int alignment) +raw_array_is_aligned(int ndim, npy_intp *shape, + char *data, npy_intp *strides, int alignment) { - if (alignment > 1) { - npy_intp align_check = (npy_intp)data; - int idim; - for (idim = 0; idim < ndim; ++idim) { - align_check |= strides[idim]; + /* + * The code below expects the following: + * * that alignment is a power of two, as required by the C standard. + * * that casting from pointer to uintp gives a sensible representation + * we can use bitwise operations on (perhaps *not* req. by C std, + * but assumed by glibc so it should be fine) + * * that casting stride from intp to uintp (to avoid dependence on the + * signed int representation) preserves remainder wrt alignment, so + * stride%a is the same as ((unsigned intp)stride)%a. Req. by C std. + * + * The code checks whether the lowest log2(alignment) bits of `data` + * and all `strides` are 0, as this implies that + * (data + n*stride)%alignment == 0 for all integers n. + */ + + if (alignment > 1) { + npy_uintp align_check = (npy_uintp)data; + int i; + + for (i = 0; i < ndim; i++) { +#if NPY_RELAXED_STRIDES_CHECKING + /* skip dim == 1 as it is not required to have stride 0 */ + if (shape[i] > 1) { + /* if shape[i] == 1, the stride is never used */ + align_check |= (npy_uintp)strides[i]; + } + else if (shape[i] == 0) { + /* an array with zero elements is always aligned */ + return 1; + } +#else /* not NPY_RELAXED_STRIDES_CHECKING */ + align_check |= (npy_uintp)strides[i]; +#endif /* not NPY_RELAXED_STRIDES_CHECKING */ } return npy_is_aligned((void *)align_check, alignment); @@ -101,6 +130,23 @@ raw_array_is_aligned(int ndim, char *data, npy_intp *strides, int alignment) } } +NPY_NO_EXPORT int +IsAligned(PyArrayObject *ap) +{ + return raw_array_is_aligned(PyArray_NDIM(ap), PyArray_DIMS(ap), + PyArray_DATA(ap), PyArray_STRIDES(ap), + PyArray_DESCR(ap)->alignment); +} + +NPY_NO_EXPORT int +IsUintAligned(PyArrayObject *ap) +{ + return raw_array_is_aligned(PyArray_NDIM(ap), PyArray_DIMS(ap), + PyArray_DATA(ap), PyArray_STRIDES(ap), + npy_uint_alignment(PyArray_DESCR(ap)->elsize)); +} + + /* Returns 1 if the arrays have overlapping data, 0 otherwise */ NPY_NO_EXPORT int diff --git a/numpy/core/src/common/array_assign.h b/numpy/core/src/common/array_assign.h index 3fecff007..07438c5e8 100644 --- a/numpy/core/src/common/array_assign.h +++ b/numpy/core/src/common/array_assign.h @@ -87,10 +87,26 @@ broadcast_strides(int ndim, npy_intp *shape, /* * Checks whether a data pointer + set of strides refers to a raw - * array which is fully aligned data. + * array whose elements are all aligned to a given alignment. + * alignment should be a power of two. */ NPY_NO_EXPORT int -raw_array_is_aligned(int ndim, char *data, npy_intp *strides, int alignment); +raw_array_is_aligned(int ndim, npy_intp *shape, + char *data, npy_intp *strides, int alignment); + +/* + * Checks if an array is aligned to its "true alignment" + * given by dtype->alignment. + */ +NPY_NO_EXPORT int +IsAligned(PyArrayObject *ap); + +/* + * Checks if an array is aligned to its "uint alignment" + * given by npy_uint_alignment(dtype->elsize). + */ +NPY_NO_EXPORT int +IsUintAligned(PyArrayObject *ap); /* Returns 1 if the arrays have overlapping data, 0 otherwise */ NPY_NO_EXPORT int diff --git a/numpy/core/src/common/lowlevel_strided_loops.h b/numpy/core/src/common/lowlevel_strided_loops.h index f9c671f77..5f139cffb 100644 --- a/numpy/core/src/common/lowlevel_strided_loops.h +++ b/numpy/core/src/common/lowlevel_strided_loops.h @@ -7,7 +7,9 @@ /* * NOTE: This API should remain private for the time being, to allow * for further refinement. I think the 'aligned' mechanism - * needs changing, for example. + * needs changing, for example. + * + * Note: Updated in 2018 to distinguish "true" from "uint" alignment. */ /* @@ -69,8 +71,9 @@ typedef void (PyArray_StridedBinaryOp)(char *dst, npy_intp dst_stride, * strided memory. Returns NULL if there is a problem with the inputs. * * aligned: - * Should be 1 if the src and dst pointers are always aligned, - * 0 otherwise. + * Should be 1 if the src and dst pointers always point to + * locations at which a uint of equal size to dtype->elsize + * would be aligned, 0 otherwise. * src_stride: * Should be the src stride if it will always be the same, * NPY_MAX_INTP otherwise. @@ -165,8 +168,9 @@ PyArray_GetDTypeCopySwapFn(int aligned, * function when the transfer function is no longer required. * * aligned: - * Should be 1 if the src and dst pointers are always aligned, - * 0 otherwise. + * Should be 1 if the src and dst pointers always point to + * locations at which a uint of equal size to dtype->elsize + * would be aligned, 0 otherwise. * src_stride: * Should be the src stride if it will always be the same, * NPY_MAX_INTP otherwise. diff --git a/numpy/core/src/common/npy_config.h b/numpy/core/src/common/npy_config.h index 8143e7719..673ea1d94 100644 --- a/numpy/core/src/common/npy_config.h +++ b/numpy/core/src/common/npy_config.h @@ -6,22 +6,6 @@ #include "numpy/npy_cpu.h" #include "numpy/npy_os.h" -/* - * largest alignment the copy loops might require - * required as string, void and complex types might get copied using larger - * instructions than required to operate on them. E.g. complex float is copied - * in 8 byte moves but arithmetic on them only loads in 4 byte moves. - * the sparc platform may need that alignment for long doubles. - * amd64 is not harmed much by the bloat as the system provides 16 byte - * alignment by default. - */ -#if (defined NPY_CPU_X86 || defined _WIN32 || defined NPY_CPU_ARMEL_AARCH32 ||\ - defined NPY_CPU_ARMEB_AARCH32) -#define NPY_MAX_COPY_ALIGNMENT 8 -#else -#define NPY_MAX_COPY_ALIGNMENT 16 -#endif - /* blacklist */ /* Disable broken Sun Workshop Pro math functions */ diff --git a/numpy/core/src/multiarray/_multiarray_tests.c.src b/numpy/core/src/multiarray/_multiarray_tests.c.src index 67c9a333c..6c4d49bd1 100644 --- a/numpy/core/src/multiarray/_multiarray_tests.c.src +++ b/numpy/core/src/multiarray/_multiarray_tests.c.src @@ -6,6 +6,7 @@ #include "numpy/arrayscalars.h" #include "numpy/npy_math.h" #include "numpy/halffloat.h" +#include "common.h" #include "mem_overlap.h" #include "npy_extint128.h" #include "common.h" @@ -1641,6 +1642,42 @@ extint_ceildiv_128_64(PyObject *NPY_UNUSED(self), PyObject *args) { return pylong_from_int128(c); } +struct TestStruct1 { + npy_uint8 a; + npy_complex64 b; +}; + +struct TestStruct2 { + npy_uint32 a; + npy_complex64 b; +}; + +struct TestStruct3 { + npy_uint8 a; + struct TestStruct1 b; +}; + +static PyObject * +get_struct_alignments(PyObject *NPY_UNUSED(self), PyObject *args) { + PyObject *ret = PyTuple_New(3); + PyObject *alignment, *size, *val; + +/**begin repeat + * #N = 1,2,3# + */ + alignment = PyInt_FromLong(_ALIGN(struct TestStruct@N@)); + size = PyInt_FromLong(sizeof(struct TestStruct@N@)); + val = PyTuple_Pack(2, alignment, size); + Py_DECREF(alignment); + Py_DECREF(size); + if (val == NULL) { + return NULL; + } + PyTuple_SET_ITEM(ret, @N@-1, val); +/**end repeat**/ + return ret; +} + static char get_fpu_mode_doc[] = ( "get_fpu_mode()\n" @@ -1956,6 +1993,9 @@ static PyMethodDef Multiarray_TestsMethods[] = { {"format_float_OSprintf_g", (PyCFunction)printf_float_g, METH_VARARGS , NULL}, + {"get_struct_alignments", + get_struct_alignments, + METH_VARARGS, NULL}, {NULL, NULL, 0, NULL} /* Sentinel */ }; diff --git a/numpy/core/src/multiarray/array_assign_array.c b/numpy/core/src/multiarray/array_assign_array.c index 74fbb88c2..f692e0307 100644 --- a/numpy/core/src/multiarray/array_assign_array.c +++ b/numpy/core/src/multiarray/array_assign_array.c @@ -49,10 +49,10 @@ raw_array_assign_array(int ndim, npy_intp *shape, NPY_BEGIN_THREADS_DEF; /* Check alignment */ - aligned = raw_array_is_aligned(ndim, - dst_data, dst_strides, dst_dtype->alignment) && - raw_array_is_aligned(ndim, - src_data, src_strides, src_dtype->alignment); + aligned = raw_array_is_aligned(ndim, shape, dst_data, dst_strides, + npy_uint_alignment(dst_dtype->elsize)) && + raw_array_is_aligned(ndim, shape, src_data, src_strides, + npy_uint_alignment(src_dtype->elsize)); /* Use raw iteration with no heap allocation */ if (PyArray_PrepareTwoRawArrayIter( @@ -134,10 +134,10 @@ raw_array_wheremasked_assign_array(int ndim, npy_intp *shape, NPY_BEGIN_THREADS_DEF; /* Check alignment */ - aligned = raw_array_is_aligned(ndim, - dst_data, dst_strides, dst_dtype->alignment) && - raw_array_is_aligned(ndim, - src_data, src_strides, src_dtype->alignment); + aligned = raw_array_is_aligned(ndim, shape, dst_data, dst_strides, + npy_uint_alignment(dst_dtype->elsize)) && + raw_array_is_aligned(ndim, shape, src_data, src_strides, + npy_uint_alignment(src_dtype->elsize)); /* Use raw iteration with no heap allocation */ if (PyArray_PrepareThreeRawArrayIter( diff --git a/numpy/core/src/multiarray/array_assign_scalar.c b/numpy/core/src/multiarray/array_assign_scalar.c index 17de99cb9..841a41850 100644 --- a/numpy/core/src/multiarray/array_assign_scalar.c +++ b/numpy/core/src/multiarray/array_assign_scalar.c @@ -46,11 +46,9 @@ raw_array_assign_scalar(int ndim, npy_intp *shape, NPY_BEGIN_THREADS_DEF; /* Check alignment */ - aligned = raw_array_is_aligned(ndim, dst_data, dst_strides, - dst_dtype->alignment); - if (!npy_is_aligned(src_data, src_dtype->alignment)) { - aligned = 0; - } + aligned = raw_array_is_aligned(ndim, shape, dst_data, dst_strides, + npy_uint_alignment(dst_dtype->elsize)) && + npy_is_aligned(src_data, npy_uint_alignment(src_dtype->elsize)); /* Use raw iteration with no heap allocation */ if (PyArray_PrepareOneRawArrayIter( @@ -119,11 +117,9 @@ raw_array_wheremasked_assign_scalar(int ndim, npy_intp *shape, NPY_BEGIN_THREADS_DEF; /* Check alignment */ - aligned = raw_array_is_aligned(ndim, dst_data, dst_strides, - dst_dtype->alignment); - if (!npy_is_aligned(src_data, src_dtype->alignment)) { - aligned = 0; - } + aligned = raw_array_is_aligned(ndim, shape, dst_data, dst_strides, + npy_uint_alignment(dst_dtype->elsize)) && + npy_is_aligned(src_data, npy_uint_alignment(src_dtype->elsize)); /* Use raw iteration with no heap allocation */ if (PyArray_PrepareTwoRawArrayIter( @@ -224,7 +220,7 @@ PyArray_AssignRawScalar(PyArrayObject *dst, * we also skip this if 'dst' has an object dtype. */ if ((!PyArray_EquivTypes(PyArray_DESCR(dst), src_dtype) || - !npy_is_aligned(src_data, src_dtype->alignment)) && + !npy_is_aligned(src_data, npy_uint_alignment(src_dtype->elsize))) && PyArray_SIZE(dst) > 1 && !PyDataType_REFCHK(PyArray_DESCR(dst))) { char *tmp_src_data; diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src index d622effe6..3c735dd7e 100644 --- a/numpy/core/src/multiarray/arraytypes.c.src +++ b/numpy/core/src/multiarray/arraytypes.c.src @@ -4322,12 +4322,11 @@ static PyArray_Descr @from@_Descr = { * cfloat, cdouble, clongdouble, * object, datetime, timedelta# * #sort = 1*18, 0*1, 1*2# - * #num = 1*15, 2*3, 1*3# * #fromtype = npy_bool, * npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint, * npy_long, npy_ulong, npy_longlong, npy_ulonglong, * npy_half, npy_float, npy_double, npy_longdouble, - * npy_float, npy_double, npy_longdouble, + * npy_cfloat, npy_cdouble, npy_clongdouble, * PyObject *, npy_datetime, npy_timedelta# * #NAME = Bool, * Byte, UByte, Short, UShort, Int, UInt, @@ -4428,10 +4427,9 @@ NPY_NO_EXPORT PyArray_Descr @from@_Descr = { /* type_num */ NPY_@from@, /* elsize */ - @num@ * sizeof(@fromtype@), + sizeof(@fromtype@), /* alignment */ - @num@ * _ALIGN(@fromtype@) > NPY_MAX_COPY_ALIGNMENT ? - NPY_MAX_COPY_ALIGNMENT : @num@ * _ALIGN(@fromtype@), + _ALIGN(@fromtype@), /* subarray */ NULL, /* fields */ @@ -4786,13 +4784,10 @@ set_typeinfo(PyObject *dict) * CFLOAT, CDOUBLE, CLONGDOUBLE# * #Name = Half, Float, Double, LongDouble, * CFloat, CDouble, CLongDouble# - * #num = 1, 1, 1, 1, 2, 2, 2# */ s = PyArray_typeinfo( NPY_@name@LTR, NPY_@name@, NPY_BITSOF_@name@, - @num@ * _ALIGN(@type@) > NPY_MAX_COPY_ALIGNMENT ? - NPY_MAX_COPY_ALIGNMENT : @num@ * _ALIGN(@type@), - &Py@Name@ArrType_Type + _ALIGN(@type@), &Py@Name@ArrType_Type ); if (s == NULL) { return -1; diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c index 4f695fdc7..5b4611e8a 100644 --- a/numpy/core/src/multiarray/common.c +++ b/numpy/core/src/multiarray/common.c @@ -587,50 +587,6 @@ _zerofill(PyArrayObject *ret) return 0; } -NPY_NO_EXPORT int -_IsAligned(PyArrayObject *ap) -{ - int i; - npy_uintp aligned; - npy_uintp alignment = PyArray_DESCR(ap)->alignment; - - /* alignment 1 types should have a efficient alignment for copy loops */ - if (PyArray_ISFLEXIBLE(ap) || PyArray_ISSTRING(ap)) { - npy_intp itemsize = PyArray_ITEMSIZE(ap); - /* power of two sizes may be loaded in larger moves */ - if (((itemsize & (itemsize - 1)) == 0)) { - alignment = itemsize > NPY_MAX_COPY_ALIGNMENT ? - NPY_MAX_COPY_ALIGNMENT : itemsize; - } - else { - /* if not power of two it will be accessed bytewise */ - alignment = 1; - } - } - - if (alignment == 1) { - return 1; - } - aligned = (npy_uintp)PyArray_DATA(ap); - - for (i = 0; i < PyArray_NDIM(ap); i++) { -#if NPY_RELAXED_STRIDES_CHECKING - /* skip dim == 1 as it is not required to have stride 0 */ - if (PyArray_DIM(ap, i) > 1) { - /* if shape[i] == 1, the stride is never used */ - aligned |= (npy_uintp)PyArray_STRIDES(ap)[i]; - } - else if (PyArray_DIM(ap, i) == 0) { - /* an array with zero elements is always aligned */ - return 1; - } -#else /* not NPY_RELAXED_STRIDES_CHECKING */ - aligned |= (npy_uintp)PyArray_STRIDES(ap)[i]; -#endif /* not NPY_RELAXED_STRIDES_CHECKING */ - } - return npy_is_aligned((void *)aligned, alignment); -} - NPY_NO_EXPORT npy_bool _IsWriteable(PyArrayObject *ap) { diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h index db0a49920..2b8d3d3a4 100644 --- a/numpy/core/src/multiarray/common.h +++ b/numpy/core/src/multiarray/common.h @@ -1,5 +1,6 @@ #ifndef _NPY_PRIVATE_COMMON_H_ #define _NPY_PRIVATE_COMMON_H_ +#include "structmember.h" #include <numpy/npy_common.h> #include <numpy/npy_cpu.h> #include <numpy/ndarraytypes.h> @@ -56,9 +57,6 @@ index2ptr(PyArrayObject *mp, npy_intp i); NPY_NO_EXPORT int _zerofill(PyArrayObject *ret); -NPY_NO_EXPORT int -_IsAligned(PyArrayObject *ap); - NPY_NO_EXPORT npy_bool _IsWriteable(PyArrayObject *ap); @@ -182,6 +180,15 @@ check_and_adjust_axis(int *axis, int ndim) return check_and_adjust_axis_msg(axis, ndim, Py_None); } +/* used for some alignment checks */ +#define _ALIGN(type) offsetof(struct {char c; type v;}, v) +/* + * Disable harmless compiler warning "4116: unnamed type definition in + * parentheses" which is caused by the _ALIGN macro. + */ +#if defined(_MSC_VER) +#pragma warning(disable:4116) +#endif /* * return true if pointer is aligned to 'alignment' @@ -190,15 +197,44 @@ static NPY_INLINE int npy_is_aligned(const void * p, const npy_uintp alignment) { /* - * alignment is usually a power of two - * the test is faster than a direct modulo + * Assumes alignment is a power of two, as required by the C standard. + * Assumes cast from pointer to uintp gives a sensible representation we + * can use bitwise & on (not required by C standard, but used by glibc). + * This test is faster than a direct modulo. */ - if (NPY_LIKELY((alignment & (alignment - 1)) == 0)) { - return ((npy_uintp)(p) & ((alignment) - 1)) == 0; - } - else { - return ((npy_uintp)(p) % alignment) == 0; + return ((npy_uintp)(p) & ((alignment) - 1)) == 0; +} + +/* Get equivalent "uint" alignment given an itemsize, for use in copy code */ +static NPY_INLINE int +npy_uint_alignment(int itemsize) +{ + npy_uintp alignment = 0; /* return value of 0 means unaligned */ + + switch(itemsize){ + case 1: + return 1; + case 2: + alignment = _ALIGN(npy_uint16); + break; + case 4: + alignment = _ALIGN(npy_uint32); + break; + case 8: + alignment = _ALIGN(npy_uint64); + break; + case 16: + /* + * 16 byte types are copied using 2 uint64 assignments. + * See the strided copy function in lowlevel_strided_loops.c. + */ + alignment = _ALIGN(npy_uint64); + break; + default: + break; } + + return alignment; } /* diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index f1b8a0209..aaaaeee82 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -2832,7 +2832,7 @@ PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, NPY_ORDER order) * contiguous strides, etc. */ if (PyArray_GetDTypeTransferFunction( - PyArray_ISALIGNED(src) && PyArray_ISALIGNED(dst), + IsUintAligned(src) && IsUintAligned(dst), src_stride, dst_stride, PyArray_DESCR(src), PyArray_DESCR(dst), 0, diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c index 2cb1e0a95..97d899ce0 100644 --- a/numpy/core/src/multiarray/dtype_transfer.c +++ b/numpy/core/src/multiarray/dtype_transfer.c @@ -2965,6 +2965,10 @@ static void _strided_masked_wrapper_decsrcref_transfer_function( dst += subloopsize * dst_stride; src += subloopsize * src_stride; N -= subloopsize; + if (N <= 0) { + break; + } + /* Process unmasked values */ mask = (npy_bool*)npy_memchr((char *)mask, 0, mask_stride, N, &subloopsize, 0); @@ -3000,6 +3004,10 @@ static void _strided_masked_wrapper_transfer_function( dst += subloopsize * dst_stride; src += subloopsize * src_stride; N -= subloopsize; + if (N <= 0) { + break; + } + /* Process unmasked values */ mask = (npy_bool*)npy_memchr((char *)mask, 0, mask_stride, N, &subloopsize, 0); diff --git a/numpy/core/src/multiarray/flagsobject.c b/numpy/core/src/multiarray/flagsobject.c index a78bedccb..85ea49fb4 100644 --- a/numpy/core/src/multiarray/flagsobject.c +++ b/numpy/core/src/multiarray/flagsobject.c @@ -12,6 +12,7 @@ #include "npy_config.h" #include "npy_pycompat.h" +#include "array_assign.h" #include "common.h" @@ -64,7 +65,7 @@ PyArray_UpdateFlags(PyArrayObject *ret, int flagmask) _UpdateContiguousFlags(ret); } if (flagmask & NPY_ARRAY_ALIGNED) { - if (_IsAligned(ret)) { + if (IsAligned(ret)) { PyArray_ENABLEFLAGS(ret, NPY_ARRAY_ALIGNED); } else { diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c index 925585704..de54ca1b3 100644 --- a/numpy/core/src/multiarray/item_selection.c +++ b/numpy/core/src/multiarray/item_selection.c @@ -19,6 +19,7 @@ #include "arrayobject.h" #include "ctors.h" #include "lowlevel_strided_loops.h" +#include "array_assign.h" #include "item_selection.h" #include "npy_sort.h" @@ -809,7 +810,7 @@ _new_sortlike(PyArrayObject *op, int axis, PyArray_SortFunc *sort, npy_intp elsize = (npy_intp)PyArray_ITEMSIZE(op); npy_intp astride = PyArray_STRIDE(op, axis); int swap = PyArray_ISBYTESWAPPED(op); - int needcopy = !PyArray_ISALIGNED(op) || swap || astride != elsize; + int needcopy = !IsAligned(op) || swap || astride != elsize; int hasrefs = PyDataType_REFCHK(PyArray_DESCR(op)); PyArray_CopySwapNFunc *copyswapn = PyArray_DESCR(op)->f->copyswapn; @@ -937,7 +938,7 @@ _new_argsortlike(PyArrayObject *op, int axis, PyArray_ArgSortFunc *argsort, npy_intp elsize = (npy_intp)PyArray_ITEMSIZE(op); npy_intp astride = PyArray_STRIDE(op, axis); int swap = PyArray_ISBYTESWAPPED(op); - int needcopy = !PyArray_ISALIGNED(op) || swap || astride != elsize; + int needcopy = !IsAligned(op) || swap || astride != elsize; int hasrefs = PyDataType_REFCHK(PyArray_DESCR(op)); int needidxbuffer; diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src index b25b4a8b6..159bb4103 100644 --- a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src +++ b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src @@ -10,7 +10,6 @@ #define PY_SSIZE_T_CLEAN #include "Python.h" -#include "structmember.h" #define NPY_NO_DEPRECATED_API NPY_API_VERSION #define _MULTIARRAYMODULE @@ -19,16 +18,7 @@ #include <numpy/halffloat.h> #include "lowlevel_strided_loops.h" - -/* used for some alignment checks */ -#define _ALIGN(type) offsetof(struct {char c; type v;}, v) -/* - * Disable harmless compiler warning "4116: unnamed type definition in - * parentheses" which is caused by the _ALIGN macro. - */ -#if defined(_MSC_VER) -#pragma warning(disable:4116) -#endif +#include "array_assign.h" /* @@ -1385,7 +1375,7 @@ mapiter_trivial_@name@(PyArrayObject *self, PyArrayObject *ind, npy_intp itersize; - int is_aligned = PyArray_ISALIGNED(self) && PyArray_ISALIGNED(result); + int is_aligned = IsUintAligned(self) && IsUintAligned(result); int needs_api = PyDataType_REFCHK(PyArray_DESCR(self)); PyArray_CopySwapFunc *copyswap = PyArray_DESCR(self)->f->copyswap; @@ -1518,7 +1508,7 @@ mapiter_@name@(PyArrayMapIterObject *mit) * could also check extra_op is buffered, but it should rarely matter. */ - is_aligned = PyArray_ISALIGNED(array) && PyArray_ISALIGNED(mit->extra_op); + is_aligned = IsUintAligned(array) && IsUintAligned(mit->extra_op); if (mit->size == 0) { return 0; diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c index b50866b4d..038c21c92 100644 --- a/numpy/core/src/multiarray/mapping.c +++ b/numpy/core/src/multiarray/mapping.c @@ -20,6 +20,7 @@ #include "lowlevel_strided_loops.h" #include "item_selection.h" #include "mem_overlap.h" +#include "array_assign.h" #define HAS_INTEGER 1 @@ -1063,7 +1064,7 @@ array_boolean_subscript(PyArrayObject *self, /* Get a dtype transfer function */ NpyIter_GetInnerFixedStrideArray(iter, fixed_strides); - if (PyArray_GetDTypeTransferFunction(PyArray_ISALIGNED(self), + if (PyArray_GetDTypeTransferFunction(IsUintAligned(self), fixed_strides[0], itemsize, dtype, dtype, 0, @@ -1252,7 +1253,7 @@ array_assign_boolean_subscript(PyArrayObject *self, /* Get a dtype transfer function */ NpyIter_GetInnerFixedStrideArray(iter, fixed_strides); if (PyArray_GetDTypeTransferFunction( - PyArray_ISALIGNED(self) && PyArray_ISALIGNED(v), + IsUintAligned(self) && IsUintAligned(v), v_stride, fixed_strides[0], PyArray_DESCR(v), PyArray_DESCR(self), 0, @@ -1723,7 +1724,7 @@ array_subscript(PyArrayObject *self, PyObject *op) /* Check if the type is equivalent to INTP */ PyArray_ITEMSIZE(ind) == sizeof(npy_intp) && PyArray_DESCR(ind)->kind == 'i' && - PyArray_ISALIGNED(ind) && + IsUintAligned(ind) && PyDataType_ISNOTSWAPPED(PyArray_DESCR(ind))) { Py_INCREF(PyArray_DESCR(self)); @@ -2086,7 +2087,7 @@ array_assign_subscript(PyArrayObject *self, PyObject *ind, PyObject *op) /* Check if the type is equivalent to INTP */ PyArray_ITEMSIZE(ind) == sizeof(npy_intp) && PyArray_DESCR(ind)->kind == 'i' && - PyArray_ISALIGNED(ind) && + IsUintAligned(ind) && PyDataType_ISNOTSWAPPED(PyArray_DESCR(ind))) { /* trivial_set checks the index for us */ @@ -2606,7 +2607,7 @@ PyArray_MapIterCheckIndices(PyArrayMapIterObject *mit) /* Check if the type is equivalent to INTP */ PyArray_ITEMSIZE(op) == sizeof(npy_intp) && PyArray_DESCR(op)->kind == 'i' && - PyArray_ISALIGNED(op) && + IsUintAligned(op) && PyDataType_ISNOTSWAPPED(PyArray_DESCR(op))) { char *data; npy_intp stride; diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c index 6317d6a16..cb63c7f74 100644 --- a/numpy/core/src/multiarray/methods.c +++ b/numpy/core/src/multiarray/methods.c @@ -21,6 +21,7 @@ #include "conversion_utils.h" #include "shape.h" #include "strfuncs.h" +#include "array_assign.h" #include "methods.h" #include "alloc.h" @@ -1785,11 +1786,11 @@ array_setstate(PyArrayObject *self, PyObject *args) fa->data = datastr; #ifndef NPY_PY3K /* Check that the string is not interned */ - if (!_IsAligned(self) || swap || PyString_CHECK_INTERNED(rawdata)) { + if (!IsAligned(self) || swap || PyString_CHECK_INTERNED(rawdata)) { #else /* Bytes should always be considered immutable, but we just grab the * pointer if they are large, to save memory. */ - if (!_IsAligned(self) || swap || (len <= 1000)) { + if (!IsAligned(self) || swap || (len <= 1000)) { #endif npy_intp num = PyArray_NBYTES(self); fa->data = PyDataMem_NEW(num); @@ -2281,7 +2282,7 @@ array_setflags(PyArrayObject *self, PyObject *args, PyObject *kwds) if (PyObject_Not(align_flag)) { PyArray_CLEARFLAGS(self, NPY_ARRAY_ALIGNED); } - else if (_IsAligned(self)) { + else if (IsAligned(self)) { PyArray_ENABLEFLAGS(self, NPY_ARRAY_ALIGNED); } else { diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c index c56376f58..dbb24f26b 100644 --- a/numpy/core/src/multiarray/nditer_constr.c +++ b/numpy/core/src/multiarray/nditer_constr.c @@ -17,8 +17,7 @@ #include "arrayobject.h" #include "templ_common.h" -#include "mem_overlap.h" - +#include "array_assign.h" /* Internal helper functions private to this file */ static int @@ -1133,7 +1132,7 @@ npyiter_prepare_one_operand(PyArrayObject **op, /* Check if the operand is aligned */ if (op_flags & NPY_ITER_ALIGNED) { /* Check alignment */ - if (!PyArray_ISALIGNED(*op)) { + if (!IsUintAligned(*op)) { NPY_IT_DBG_PRINT("Iterator: Setting NPY_OP_ITFLAG_CAST " "because of NPY_ITER_ALIGNED\n"); *op_itflags |= NPY_OP_ITFLAG_CAST; @@ -2975,7 +2974,7 @@ npyiter_allocate_arrays(NpyIter *iter, * If the operand is aligned, any buffering can use aligned * optimizations. */ - if (PyArray_ISALIGNED(op[iop])) { + if (IsUintAligned(op[iop])) { op_itflags[iop] |= NPY_OP_ITFLAG_ALIGNED; } } diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py index 6fbdb7c6c..f22ecdb79 100644 --- a/numpy/core/tests/test_multiarray.py +++ b/numpy/core/tests/test_multiarray.py @@ -7331,7 +7331,6 @@ class TestFormat(object): dst = object.__format__(a, '30') assert_equal(res, dst) - class TestCTypes(object): def test_ctypes_is_available(self): @@ -7611,3 +7610,39 @@ def test_npymath_real(): got = fun(z) expected = npfun(z) assert_allclose(got, expected) + +def test_uintalignment_and_alignment(): + # alignment code needs to satisfy these requrements: + # 1. numpy structs match C struct layout + # 2. ufuncs/casting is safe wrt to aligned access + # 3. copy code is safe wrt to "uint alidned" access + # + # Complex types are the main problem, whose alignment may not be the same + # as their "uint alignment". + # + # This test might only fail on certain platforms, where uint64 alignment is + # not equal to complex64 alignment. The second 2 tests will only fail + # for DEBUG=1. + + d1 = np.dtype('u1,c8', align=True) + d2 = np.dtype('u4,c8', align=True) + d3 = np.dtype({'names': ['a', 'b'], 'formats': ['u1', d1]}, align=True) + + assert_equal(np.zeros(1, dtype=d1)['f1'].flags['ALIGNED'], True) + assert_equal(np.zeros(1, dtype=d2)['f1'].flags['ALIGNED'], True) + assert_equal(np.zeros(1, dtype='u1,c8')['f1'].flags['ALIGNED'], False) + + # check that C struct matches numpy struct size + s = _multiarray_tests.get_struct_alignments() + for d, (alignment, size) in zip([d1,d2,d3], s): + assert_equal(d.alignment, alignment) + assert_equal(d.itemsize, size) + + # check that ufuncs don't complain in debug mode + # (this is probably OK if the aligned flag is true above) + src = np.zeros((2,2), dtype=d1)['f1'] # 4-byte aligned, often + np.exp(src) # assert fails? + + # check that copy code doesn't complain in debug mode + dst = np.zeros((2,2), dtype='c8') + dst[:,1] = src[:,1] # assert in lowlevel_strided_loops fails? |