summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorCharles Harris <charlesr.harris@gmail.com>2018-09-30 12:52:16 -0500
committerGitHub <noreply@github.com>2018-09-30 12:52:16 -0500
commit4a7926f7085482a5f1f3669715ca20d20f12099a (patch)
tree9c595d88ccd5d242661407772b35449125eb2f00
parent87c1fcd0308ee78e743401bac2b0085249cca1e5 (diff)
parent12bd7c372ab5eeeff8bd2a46d765ccf28c6ce486 (diff)
downloadnumpy-4a7926f7085482a5f1f3669715ca20d20f12099a.tar.gz
Merge pull request #6377 from ahaldane/fix_align
BUG: define "uint-alignment", fixes complex64 alignment
-rw-r--r--doc/release/1.16.0-notes.rst15
-rw-r--r--doc/source/dev/alignment.rst96
-rw-r--r--numpy/core/src/common/array_assign.c58
-rw-r--r--numpy/core/src/common/array_assign.h20
-rw-r--r--numpy/core/src/common/lowlevel_strided_loops.h14
-rw-r--r--numpy/core/src/common/npy_config.h16
-rw-r--r--numpy/core/src/multiarray/_multiarray_tests.c.src40
-rw-r--r--numpy/core/src/multiarray/array_assign_array.c16
-rw-r--r--numpy/core/src/multiarray/array_assign_scalar.c18
-rw-r--r--numpy/core/src/multiarray/arraytypes.c.src13
-rw-r--r--numpy/core/src/multiarray/common.c44
-rw-r--r--numpy/core/src/multiarray/common.h56
-rw-r--r--numpy/core/src/multiarray/ctors.c2
-rw-r--r--numpy/core/src/multiarray/dtype_transfer.c8
-rw-r--r--numpy/core/src/multiarray/flagsobject.c3
-rw-r--r--numpy/core/src/multiarray/item_selection.c5
-rw-r--r--numpy/core/src/multiarray/lowlevel_strided_loops.c.src16
-rw-r--r--numpy/core/src/multiarray/mapping.c11
-rw-r--r--numpy/core/src/multiarray/methods.c7
-rw-r--r--numpy/core/src/multiarray/nditer_constr.c7
-rw-r--r--numpy/core/tests/test_multiarray.py37
21 files changed, 361 insertions, 141 deletions
diff --git a/doc/release/1.16.0-notes.rst b/doc/release/1.16.0-notes.rst
index f85ecb300..72bf96295 100644
--- a/doc/release/1.16.0-notes.rst
+++ b/doc/release/1.16.0-notes.rst
@@ -69,6 +69,21 @@ old behavior, use ``np.isnat`` to explicitly check for NaT or convert
datetime64/timedelta64 arrays with ``.astype(np.int64)`` before making
comparisons.
+complex64/128 alignment has changed
+-----------------------------------
+The memory alignment of complex types is now the same as a C-struct composed of
+two floating point values, while before it was equal to the size of the type.
+For many users (for instance on x64/unix/gcc) this means that complex64 is now
+4-byte aligned instead of 8-byte aligned. An important consequence is that
+aligned structured dtypes may now have a different size. For instance,
+``np.dtype('c8,u1', align=True)`` used to have an itemsize of 16 (on x64/gcc)
+but now it is 12.
+
+More in detail, the complex64 type now has the same alignment as a C-struct
+``struct {float r, i;}``, according to the compiler used to compile numpy, and
+similarly for the complex128 and complex256 types.
+
+
C API changes
=============
diff --git a/doc/source/dev/alignment.rst b/doc/source/dev/alignment.rst
new file mode 100644
index 000000000..f067f0d03
--- /dev/null
+++ b/doc/source/dev/alignment.rst
@@ -0,0 +1,96 @@
+.. _alignment:
+
+
+Numpy Alignment Goals
+=====================
+
+There are three use-cases related to memory alignment in numpy (as of 1.14):
+
+ 1. Creating structured datatypes with fields aligned like in a C-struct.
+ 2. Speeding up copy operations by using uint assignment in instead of memcpy
+ 3. Guaranteeing safe aligned access for ufuncs/setitem/casting code
+
+Numpy uses two different forms of alignment to achieve these goals:
+"True alignment" and "Uint alignment".
+
+"True" alignment refers to the architecture-dependent alignment of an
+equivalent C-type in C. For example, in x64 systems ``numpy.float64`` is
+equivalent to ``double`` in C. On most systems this has either an alignment of
+4 or 8 bytes (and this can be controlled in gcc by the option
+``malign-double``). A variable is aligned in memory if its memory offset is a
+multiple of its alignment. On some systems (eg sparc) memory alignment is
+required, on others it gives a speedup.
+
+"Uint" alignment depends on the size of a datatype. It is defined to be the
+"True alignment" of the uint used by numpy's copy-code to copy the datatype, or
+undefined/unaligned if there is no equivalent uint. Currently numpy uses uint8,
+uint16, uint32, uint64 and uint64 to copy data of size 1,2,4,8,16 bytes
+respectively, and all other sized datatypes cannot be uint-aligned.
+
+For example, on a (typical linux x64 gcc) system, the numpy ``complex64``
+datatype is implemented as ``struct { float real, imag; }``. This has "true"
+alignment of 4 and "uint" alignment of 8 (equal to the true alignment of
+``uint64``).
+
+Variables in Numpy which control and describe alignment
+=======================================================
+
+There are 4 relevant uses of the word ``align`` used in numpy:
+
+ * The ``dtype.alignment`` attribute (``descr->alignment`` in C). This is meant
+ to reflect the "true alignment" of the type. It has arch-dependent default
+ values for all datatypes, with the exception of structured types created
+ with ``align=True`` as described below.
+ * The ``ALIGNED`` flag of an ndarray, computed in ``IsAligned`` and checked
+ by ``PyArray_ISALIGNED``. This is computed from ``dtype.alignment``.
+ It is set to ``True`` if every item in the array is at a memory location
+ consistent with ``dtype.alignment``, which is the case if the data ptr and
+ all strides of the array are multiples of that alignment.
+ * The ``align`` keyword of the dtype constructor, which only affects structured
+ arrays. If the structure's field offsets are not manually provided numpy
+ determines offsets automatically. In that case, ``align=True`` pads the
+ structure so that each field is "true" aligned in memory and sets
+ ``dtype.alignment`` to be the largest of the field "true" alignments. This
+ is like what C-structs usually do. Otherwise if offsets or itemsize were
+ manually provided ``align=True`` simply checks that all the fields are
+ "true" aligned and that the total itemsize is a multiple of the largest
+ field alignment. In either case ``dtype.isalignedstruct`` is also set to
+ True.
+ * ``IsUintAligned`` is used to determine if an ndarray is "uint aligned" in
+ an analagous way to how ``IsAligned`` checks for true-alignment.
+
+Consequences of alignment
+=========================
+
+Here is how the variables above are used:
+
+ 1. Creating aligned structs: In order to know how to offset a field when
+ ``align=True``, numpy looks up ``field.dtype.alignment``. This includes
+ fields which are nested structured arrays.
+ 2. Ufuncs: If the ``ALIGNED`` flag of an array is False, ufuncs will
+ buffer/cast the array before evaluation. This is needed since ufunc inner
+ loops access raw elements directly, which might fail on some archs if the
+ elements are not true-aligned.
+ 3. Getitem/setitem/copyswap function: Similar to ufuncs, these functions
+ generally have two code paths. If ``ALIGNED`` is False they will
+ use a code path that buffers the arguments so they are true-aligned.
+ 4. Strided copy code: Here, "uint alignment" is used instead. If the itemsize
+ of an array is equal to 1, 2, 4, 8 or 16 bytes and the array is uint
+ aligned then instead numpy will do ``*(uintN*)dst) = *(uintN*)src)`` for
+ appropriate N. Otherwise numpy copies by doing ``memcpy(dst, src, N)``.
+ 5. Nditer code: Since this often calls the strided copy code, it must
+ check for "uint alignment".
+ 6. Cast code: if the array is "uint aligned" this will essentially do
+ ``*dst = CASTFUNC(*src)``. If not, it does
+ ``memmove(srcval, src); dstval = CASTFUNC(srcval); memmove(dst, dstval)``
+ where dstval/srcval are aligned.
+
+Note that in principle, only "true alignment" is required for casting code.
+However, because the casting code and copy code are deeply intertwined they
+both use "uint" alignment. This should be safe assuming uint alignment is
+always larger than true alignment, though it can cause unnecessary buffering if
+an array is "true aligned" but not "uint aligned". If there is ever a big
+rewrite of this code it would be good to allow them to use different
+alignments.
+
+
diff --git a/numpy/core/src/common/array_assign.c b/numpy/core/src/common/array_assign.c
index a48e245d8..ac3fdbef7 100644
--- a/numpy/core/src/common/array_assign.c
+++ b/numpy/core/src/common/array_assign.c
@@ -84,14 +84,43 @@ broadcast_error: {
/* See array_assign.h for parameter documentation */
NPY_NO_EXPORT int
-raw_array_is_aligned(int ndim, char *data, npy_intp *strides, int alignment)
+raw_array_is_aligned(int ndim, npy_intp *shape,
+ char *data, npy_intp *strides, int alignment)
{
- if (alignment > 1) {
- npy_intp align_check = (npy_intp)data;
- int idim;
- for (idim = 0; idim < ndim; ++idim) {
- align_check |= strides[idim];
+ /*
+ * The code below expects the following:
+ * * that alignment is a power of two, as required by the C standard.
+ * * that casting from pointer to uintp gives a sensible representation
+ * we can use bitwise operations on (perhaps *not* req. by C std,
+ * but assumed by glibc so it should be fine)
+ * * that casting stride from intp to uintp (to avoid dependence on the
+ * signed int representation) preserves remainder wrt alignment, so
+ * stride%a is the same as ((unsigned intp)stride)%a. Req. by C std.
+ *
+ * The code checks whether the lowest log2(alignment) bits of `data`
+ * and all `strides` are 0, as this implies that
+ * (data + n*stride)%alignment == 0 for all integers n.
+ */
+
+ if (alignment > 1) {
+ npy_uintp align_check = (npy_uintp)data;
+ int i;
+
+ for (i = 0; i < ndim; i++) {
+#if NPY_RELAXED_STRIDES_CHECKING
+ /* skip dim == 1 as it is not required to have stride 0 */
+ if (shape[i] > 1) {
+ /* if shape[i] == 1, the stride is never used */
+ align_check |= (npy_uintp)strides[i];
+ }
+ else if (shape[i] == 0) {
+ /* an array with zero elements is always aligned */
+ return 1;
+ }
+#else /* not NPY_RELAXED_STRIDES_CHECKING */
+ align_check |= (npy_uintp)strides[i];
+#endif /* not NPY_RELAXED_STRIDES_CHECKING */
}
return npy_is_aligned((void *)align_check, alignment);
@@ -101,6 +130,23 @@ raw_array_is_aligned(int ndim, char *data, npy_intp *strides, int alignment)
}
}
+NPY_NO_EXPORT int
+IsAligned(PyArrayObject *ap)
+{
+ return raw_array_is_aligned(PyArray_NDIM(ap), PyArray_DIMS(ap),
+ PyArray_DATA(ap), PyArray_STRIDES(ap),
+ PyArray_DESCR(ap)->alignment);
+}
+
+NPY_NO_EXPORT int
+IsUintAligned(PyArrayObject *ap)
+{
+ return raw_array_is_aligned(PyArray_NDIM(ap), PyArray_DIMS(ap),
+ PyArray_DATA(ap), PyArray_STRIDES(ap),
+ npy_uint_alignment(PyArray_DESCR(ap)->elsize));
+}
+
+
/* Returns 1 if the arrays have overlapping data, 0 otherwise */
NPY_NO_EXPORT int
diff --git a/numpy/core/src/common/array_assign.h b/numpy/core/src/common/array_assign.h
index 3fecff007..07438c5e8 100644
--- a/numpy/core/src/common/array_assign.h
+++ b/numpy/core/src/common/array_assign.h
@@ -87,10 +87,26 @@ broadcast_strides(int ndim, npy_intp *shape,
/*
* Checks whether a data pointer + set of strides refers to a raw
- * array which is fully aligned data.
+ * array whose elements are all aligned to a given alignment.
+ * alignment should be a power of two.
*/
NPY_NO_EXPORT int
-raw_array_is_aligned(int ndim, char *data, npy_intp *strides, int alignment);
+raw_array_is_aligned(int ndim, npy_intp *shape,
+ char *data, npy_intp *strides, int alignment);
+
+/*
+ * Checks if an array is aligned to its "true alignment"
+ * given by dtype->alignment.
+ */
+NPY_NO_EXPORT int
+IsAligned(PyArrayObject *ap);
+
+/*
+ * Checks if an array is aligned to its "uint alignment"
+ * given by npy_uint_alignment(dtype->elsize).
+ */
+NPY_NO_EXPORT int
+IsUintAligned(PyArrayObject *ap);
/* Returns 1 if the arrays have overlapping data, 0 otherwise */
NPY_NO_EXPORT int
diff --git a/numpy/core/src/common/lowlevel_strided_loops.h b/numpy/core/src/common/lowlevel_strided_loops.h
index f9c671f77..5f139cffb 100644
--- a/numpy/core/src/common/lowlevel_strided_loops.h
+++ b/numpy/core/src/common/lowlevel_strided_loops.h
@@ -7,7 +7,9 @@
/*
* NOTE: This API should remain private for the time being, to allow
* for further refinement. I think the 'aligned' mechanism
- * needs changing, for example.
+ * needs changing, for example.
+ *
+ * Note: Updated in 2018 to distinguish "true" from "uint" alignment.
*/
/*
@@ -69,8 +71,9 @@ typedef void (PyArray_StridedBinaryOp)(char *dst, npy_intp dst_stride,
* strided memory. Returns NULL if there is a problem with the inputs.
*
* aligned:
- * Should be 1 if the src and dst pointers are always aligned,
- * 0 otherwise.
+ * Should be 1 if the src and dst pointers always point to
+ * locations at which a uint of equal size to dtype->elsize
+ * would be aligned, 0 otherwise.
* src_stride:
* Should be the src stride if it will always be the same,
* NPY_MAX_INTP otherwise.
@@ -165,8 +168,9 @@ PyArray_GetDTypeCopySwapFn(int aligned,
* function when the transfer function is no longer required.
*
* aligned:
- * Should be 1 if the src and dst pointers are always aligned,
- * 0 otherwise.
+ * Should be 1 if the src and dst pointers always point to
+ * locations at which a uint of equal size to dtype->elsize
+ * would be aligned, 0 otherwise.
* src_stride:
* Should be the src stride if it will always be the same,
* NPY_MAX_INTP otherwise.
diff --git a/numpy/core/src/common/npy_config.h b/numpy/core/src/common/npy_config.h
index 8143e7719..673ea1d94 100644
--- a/numpy/core/src/common/npy_config.h
+++ b/numpy/core/src/common/npy_config.h
@@ -6,22 +6,6 @@
#include "numpy/npy_cpu.h"
#include "numpy/npy_os.h"
-/*
- * largest alignment the copy loops might require
- * required as string, void and complex types might get copied using larger
- * instructions than required to operate on them. E.g. complex float is copied
- * in 8 byte moves but arithmetic on them only loads in 4 byte moves.
- * the sparc platform may need that alignment for long doubles.
- * amd64 is not harmed much by the bloat as the system provides 16 byte
- * alignment by default.
- */
-#if (defined NPY_CPU_X86 || defined _WIN32 || defined NPY_CPU_ARMEL_AARCH32 ||\
- defined NPY_CPU_ARMEB_AARCH32)
-#define NPY_MAX_COPY_ALIGNMENT 8
-#else
-#define NPY_MAX_COPY_ALIGNMENT 16
-#endif
-
/* blacklist */
/* Disable broken Sun Workshop Pro math functions */
diff --git a/numpy/core/src/multiarray/_multiarray_tests.c.src b/numpy/core/src/multiarray/_multiarray_tests.c.src
index 67c9a333c..6c4d49bd1 100644
--- a/numpy/core/src/multiarray/_multiarray_tests.c.src
+++ b/numpy/core/src/multiarray/_multiarray_tests.c.src
@@ -6,6 +6,7 @@
#include "numpy/arrayscalars.h"
#include "numpy/npy_math.h"
#include "numpy/halffloat.h"
+#include "common.h"
#include "mem_overlap.h"
#include "npy_extint128.h"
#include "common.h"
@@ -1641,6 +1642,42 @@ extint_ceildiv_128_64(PyObject *NPY_UNUSED(self), PyObject *args) {
return pylong_from_int128(c);
}
+struct TestStruct1 {
+ npy_uint8 a;
+ npy_complex64 b;
+};
+
+struct TestStruct2 {
+ npy_uint32 a;
+ npy_complex64 b;
+};
+
+struct TestStruct3 {
+ npy_uint8 a;
+ struct TestStruct1 b;
+};
+
+static PyObject *
+get_struct_alignments(PyObject *NPY_UNUSED(self), PyObject *args) {
+ PyObject *ret = PyTuple_New(3);
+ PyObject *alignment, *size, *val;
+
+/**begin repeat
+ * #N = 1,2,3#
+ */
+ alignment = PyInt_FromLong(_ALIGN(struct TestStruct@N@));
+ size = PyInt_FromLong(sizeof(struct TestStruct@N@));
+ val = PyTuple_Pack(2, alignment, size);
+ Py_DECREF(alignment);
+ Py_DECREF(size);
+ if (val == NULL) {
+ return NULL;
+ }
+ PyTuple_SET_ITEM(ret, @N@-1, val);
+/**end repeat**/
+ return ret;
+}
+
static char get_fpu_mode_doc[] = (
"get_fpu_mode()\n"
@@ -1956,6 +1993,9 @@ static PyMethodDef Multiarray_TestsMethods[] = {
{"format_float_OSprintf_g",
(PyCFunction)printf_float_g,
METH_VARARGS , NULL},
+ {"get_struct_alignments",
+ get_struct_alignments,
+ METH_VARARGS, NULL},
{NULL, NULL, 0, NULL} /* Sentinel */
};
diff --git a/numpy/core/src/multiarray/array_assign_array.c b/numpy/core/src/multiarray/array_assign_array.c
index 74fbb88c2..f692e0307 100644
--- a/numpy/core/src/multiarray/array_assign_array.c
+++ b/numpy/core/src/multiarray/array_assign_array.c
@@ -49,10 +49,10 @@ raw_array_assign_array(int ndim, npy_intp *shape,
NPY_BEGIN_THREADS_DEF;
/* Check alignment */
- aligned = raw_array_is_aligned(ndim,
- dst_data, dst_strides, dst_dtype->alignment) &&
- raw_array_is_aligned(ndim,
- src_data, src_strides, src_dtype->alignment);
+ aligned = raw_array_is_aligned(ndim, shape, dst_data, dst_strides,
+ npy_uint_alignment(dst_dtype->elsize)) &&
+ raw_array_is_aligned(ndim, shape, src_data, src_strides,
+ npy_uint_alignment(src_dtype->elsize));
/* Use raw iteration with no heap allocation */
if (PyArray_PrepareTwoRawArrayIter(
@@ -134,10 +134,10 @@ raw_array_wheremasked_assign_array(int ndim, npy_intp *shape,
NPY_BEGIN_THREADS_DEF;
/* Check alignment */
- aligned = raw_array_is_aligned(ndim,
- dst_data, dst_strides, dst_dtype->alignment) &&
- raw_array_is_aligned(ndim,
- src_data, src_strides, src_dtype->alignment);
+ aligned = raw_array_is_aligned(ndim, shape, dst_data, dst_strides,
+ npy_uint_alignment(dst_dtype->elsize)) &&
+ raw_array_is_aligned(ndim, shape, src_data, src_strides,
+ npy_uint_alignment(src_dtype->elsize));
/* Use raw iteration with no heap allocation */
if (PyArray_PrepareThreeRawArrayIter(
diff --git a/numpy/core/src/multiarray/array_assign_scalar.c b/numpy/core/src/multiarray/array_assign_scalar.c
index 17de99cb9..841a41850 100644
--- a/numpy/core/src/multiarray/array_assign_scalar.c
+++ b/numpy/core/src/multiarray/array_assign_scalar.c
@@ -46,11 +46,9 @@ raw_array_assign_scalar(int ndim, npy_intp *shape,
NPY_BEGIN_THREADS_DEF;
/* Check alignment */
- aligned = raw_array_is_aligned(ndim, dst_data, dst_strides,
- dst_dtype->alignment);
- if (!npy_is_aligned(src_data, src_dtype->alignment)) {
- aligned = 0;
- }
+ aligned = raw_array_is_aligned(ndim, shape, dst_data, dst_strides,
+ npy_uint_alignment(dst_dtype->elsize)) &&
+ npy_is_aligned(src_data, npy_uint_alignment(src_dtype->elsize));
/* Use raw iteration with no heap allocation */
if (PyArray_PrepareOneRawArrayIter(
@@ -119,11 +117,9 @@ raw_array_wheremasked_assign_scalar(int ndim, npy_intp *shape,
NPY_BEGIN_THREADS_DEF;
/* Check alignment */
- aligned = raw_array_is_aligned(ndim, dst_data, dst_strides,
- dst_dtype->alignment);
- if (!npy_is_aligned(src_data, src_dtype->alignment)) {
- aligned = 0;
- }
+ aligned = raw_array_is_aligned(ndim, shape, dst_data, dst_strides,
+ npy_uint_alignment(dst_dtype->elsize)) &&
+ npy_is_aligned(src_data, npy_uint_alignment(src_dtype->elsize));
/* Use raw iteration with no heap allocation */
if (PyArray_PrepareTwoRawArrayIter(
@@ -224,7 +220,7 @@ PyArray_AssignRawScalar(PyArrayObject *dst,
* we also skip this if 'dst' has an object dtype.
*/
if ((!PyArray_EquivTypes(PyArray_DESCR(dst), src_dtype) ||
- !npy_is_aligned(src_data, src_dtype->alignment)) &&
+ !npy_is_aligned(src_data, npy_uint_alignment(src_dtype->elsize))) &&
PyArray_SIZE(dst) > 1 &&
!PyDataType_REFCHK(PyArray_DESCR(dst))) {
char *tmp_src_data;
diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src
index d622effe6..3c735dd7e 100644
--- a/numpy/core/src/multiarray/arraytypes.c.src
+++ b/numpy/core/src/multiarray/arraytypes.c.src
@@ -4322,12 +4322,11 @@ static PyArray_Descr @from@_Descr = {
* cfloat, cdouble, clongdouble,
* object, datetime, timedelta#
* #sort = 1*18, 0*1, 1*2#
- * #num = 1*15, 2*3, 1*3#
* #fromtype = npy_bool,
* npy_byte, npy_ubyte, npy_short, npy_ushort, npy_int, npy_uint,
* npy_long, npy_ulong, npy_longlong, npy_ulonglong,
* npy_half, npy_float, npy_double, npy_longdouble,
- * npy_float, npy_double, npy_longdouble,
+ * npy_cfloat, npy_cdouble, npy_clongdouble,
* PyObject *, npy_datetime, npy_timedelta#
* #NAME = Bool,
* Byte, UByte, Short, UShort, Int, UInt,
@@ -4428,10 +4427,9 @@ NPY_NO_EXPORT PyArray_Descr @from@_Descr = {
/* type_num */
NPY_@from@,
/* elsize */
- @num@ * sizeof(@fromtype@),
+ sizeof(@fromtype@),
/* alignment */
- @num@ * _ALIGN(@fromtype@) > NPY_MAX_COPY_ALIGNMENT ?
- NPY_MAX_COPY_ALIGNMENT : @num@ * _ALIGN(@fromtype@),
+ _ALIGN(@fromtype@),
/* subarray */
NULL,
/* fields */
@@ -4786,13 +4784,10 @@ set_typeinfo(PyObject *dict)
* CFLOAT, CDOUBLE, CLONGDOUBLE#
* #Name = Half, Float, Double, LongDouble,
* CFloat, CDouble, CLongDouble#
- * #num = 1, 1, 1, 1, 2, 2, 2#
*/
s = PyArray_typeinfo(
NPY_@name@LTR, NPY_@name@, NPY_BITSOF_@name@,
- @num@ * _ALIGN(@type@) > NPY_MAX_COPY_ALIGNMENT ?
- NPY_MAX_COPY_ALIGNMENT : @num@ * _ALIGN(@type@),
- &Py@Name@ArrType_Type
+ _ALIGN(@type@), &Py@Name@ArrType_Type
);
if (s == NULL) {
return -1;
diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c
index 4f695fdc7..5b4611e8a 100644
--- a/numpy/core/src/multiarray/common.c
+++ b/numpy/core/src/multiarray/common.c
@@ -587,50 +587,6 @@ _zerofill(PyArrayObject *ret)
return 0;
}
-NPY_NO_EXPORT int
-_IsAligned(PyArrayObject *ap)
-{
- int i;
- npy_uintp aligned;
- npy_uintp alignment = PyArray_DESCR(ap)->alignment;
-
- /* alignment 1 types should have a efficient alignment for copy loops */
- if (PyArray_ISFLEXIBLE(ap) || PyArray_ISSTRING(ap)) {
- npy_intp itemsize = PyArray_ITEMSIZE(ap);
- /* power of two sizes may be loaded in larger moves */
- if (((itemsize & (itemsize - 1)) == 0)) {
- alignment = itemsize > NPY_MAX_COPY_ALIGNMENT ?
- NPY_MAX_COPY_ALIGNMENT : itemsize;
- }
- else {
- /* if not power of two it will be accessed bytewise */
- alignment = 1;
- }
- }
-
- if (alignment == 1) {
- return 1;
- }
- aligned = (npy_uintp)PyArray_DATA(ap);
-
- for (i = 0; i < PyArray_NDIM(ap); i++) {
-#if NPY_RELAXED_STRIDES_CHECKING
- /* skip dim == 1 as it is not required to have stride 0 */
- if (PyArray_DIM(ap, i) > 1) {
- /* if shape[i] == 1, the stride is never used */
- aligned |= (npy_uintp)PyArray_STRIDES(ap)[i];
- }
- else if (PyArray_DIM(ap, i) == 0) {
- /* an array with zero elements is always aligned */
- return 1;
- }
-#else /* not NPY_RELAXED_STRIDES_CHECKING */
- aligned |= (npy_uintp)PyArray_STRIDES(ap)[i];
-#endif /* not NPY_RELAXED_STRIDES_CHECKING */
- }
- return npy_is_aligned((void *)aligned, alignment);
-}
-
NPY_NO_EXPORT npy_bool
_IsWriteable(PyArrayObject *ap)
{
diff --git a/numpy/core/src/multiarray/common.h b/numpy/core/src/multiarray/common.h
index db0a49920..2b8d3d3a4 100644
--- a/numpy/core/src/multiarray/common.h
+++ b/numpy/core/src/multiarray/common.h
@@ -1,5 +1,6 @@
#ifndef _NPY_PRIVATE_COMMON_H_
#define _NPY_PRIVATE_COMMON_H_
+#include "structmember.h"
#include <numpy/npy_common.h>
#include <numpy/npy_cpu.h>
#include <numpy/ndarraytypes.h>
@@ -56,9 +57,6 @@ index2ptr(PyArrayObject *mp, npy_intp i);
NPY_NO_EXPORT int
_zerofill(PyArrayObject *ret);
-NPY_NO_EXPORT int
-_IsAligned(PyArrayObject *ap);
-
NPY_NO_EXPORT npy_bool
_IsWriteable(PyArrayObject *ap);
@@ -182,6 +180,15 @@ check_and_adjust_axis(int *axis, int ndim)
return check_and_adjust_axis_msg(axis, ndim, Py_None);
}
+/* used for some alignment checks */
+#define _ALIGN(type) offsetof(struct {char c; type v;}, v)
+/*
+ * Disable harmless compiler warning "4116: unnamed type definition in
+ * parentheses" which is caused by the _ALIGN macro.
+ */
+#if defined(_MSC_VER)
+#pragma warning(disable:4116)
+#endif
/*
* return true if pointer is aligned to 'alignment'
@@ -190,15 +197,44 @@ static NPY_INLINE int
npy_is_aligned(const void * p, const npy_uintp alignment)
{
/*
- * alignment is usually a power of two
- * the test is faster than a direct modulo
+ * Assumes alignment is a power of two, as required by the C standard.
+ * Assumes cast from pointer to uintp gives a sensible representation we
+ * can use bitwise & on (not required by C standard, but used by glibc).
+ * This test is faster than a direct modulo.
*/
- if (NPY_LIKELY((alignment & (alignment - 1)) == 0)) {
- return ((npy_uintp)(p) & ((alignment) - 1)) == 0;
- }
- else {
- return ((npy_uintp)(p) % alignment) == 0;
+ return ((npy_uintp)(p) & ((alignment) - 1)) == 0;
+}
+
+/* Get equivalent "uint" alignment given an itemsize, for use in copy code */
+static NPY_INLINE int
+npy_uint_alignment(int itemsize)
+{
+ npy_uintp alignment = 0; /* return value of 0 means unaligned */
+
+ switch(itemsize){
+ case 1:
+ return 1;
+ case 2:
+ alignment = _ALIGN(npy_uint16);
+ break;
+ case 4:
+ alignment = _ALIGN(npy_uint32);
+ break;
+ case 8:
+ alignment = _ALIGN(npy_uint64);
+ break;
+ case 16:
+ /*
+ * 16 byte types are copied using 2 uint64 assignments.
+ * See the strided copy function in lowlevel_strided_loops.c.
+ */
+ alignment = _ALIGN(npy_uint64);
+ break;
+ default:
+ break;
}
+
+ return alignment;
}
/*
diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c
index f1b8a0209..aaaaeee82 100644
--- a/numpy/core/src/multiarray/ctors.c
+++ b/numpy/core/src/multiarray/ctors.c
@@ -2832,7 +2832,7 @@ PyArray_CopyAsFlat(PyArrayObject *dst, PyArrayObject *src, NPY_ORDER order)
* contiguous strides, etc.
*/
if (PyArray_GetDTypeTransferFunction(
- PyArray_ISALIGNED(src) && PyArray_ISALIGNED(dst),
+ IsUintAligned(src) && IsUintAligned(dst),
src_stride, dst_stride,
PyArray_DESCR(src), PyArray_DESCR(dst),
0,
diff --git a/numpy/core/src/multiarray/dtype_transfer.c b/numpy/core/src/multiarray/dtype_transfer.c
index 2cb1e0a95..97d899ce0 100644
--- a/numpy/core/src/multiarray/dtype_transfer.c
+++ b/numpy/core/src/multiarray/dtype_transfer.c
@@ -2965,6 +2965,10 @@ static void _strided_masked_wrapper_decsrcref_transfer_function(
dst += subloopsize * dst_stride;
src += subloopsize * src_stride;
N -= subloopsize;
+ if (N <= 0) {
+ break;
+ }
+
/* Process unmasked values */
mask = (npy_bool*)npy_memchr((char *)mask, 0, mask_stride, N,
&subloopsize, 0);
@@ -3000,6 +3004,10 @@ static void _strided_masked_wrapper_transfer_function(
dst += subloopsize * dst_stride;
src += subloopsize * src_stride;
N -= subloopsize;
+ if (N <= 0) {
+ break;
+ }
+
/* Process unmasked values */
mask = (npy_bool*)npy_memchr((char *)mask, 0, mask_stride, N,
&subloopsize, 0);
diff --git a/numpy/core/src/multiarray/flagsobject.c b/numpy/core/src/multiarray/flagsobject.c
index a78bedccb..85ea49fb4 100644
--- a/numpy/core/src/multiarray/flagsobject.c
+++ b/numpy/core/src/multiarray/flagsobject.c
@@ -12,6 +12,7 @@
#include "npy_config.h"
#include "npy_pycompat.h"
+#include "array_assign.h"
#include "common.h"
@@ -64,7 +65,7 @@ PyArray_UpdateFlags(PyArrayObject *ret, int flagmask)
_UpdateContiguousFlags(ret);
}
if (flagmask & NPY_ARRAY_ALIGNED) {
- if (_IsAligned(ret)) {
+ if (IsAligned(ret)) {
PyArray_ENABLEFLAGS(ret, NPY_ARRAY_ALIGNED);
}
else {
diff --git a/numpy/core/src/multiarray/item_selection.c b/numpy/core/src/multiarray/item_selection.c
index 925585704..de54ca1b3 100644
--- a/numpy/core/src/multiarray/item_selection.c
+++ b/numpy/core/src/multiarray/item_selection.c
@@ -19,6 +19,7 @@
#include "arrayobject.h"
#include "ctors.h"
#include "lowlevel_strided_loops.h"
+#include "array_assign.h"
#include "item_selection.h"
#include "npy_sort.h"
@@ -809,7 +810,7 @@ _new_sortlike(PyArrayObject *op, int axis, PyArray_SortFunc *sort,
npy_intp elsize = (npy_intp)PyArray_ITEMSIZE(op);
npy_intp astride = PyArray_STRIDE(op, axis);
int swap = PyArray_ISBYTESWAPPED(op);
- int needcopy = !PyArray_ISALIGNED(op) || swap || astride != elsize;
+ int needcopy = !IsAligned(op) || swap || astride != elsize;
int hasrefs = PyDataType_REFCHK(PyArray_DESCR(op));
PyArray_CopySwapNFunc *copyswapn = PyArray_DESCR(op)->f->copyswapn;
@@ -937,7 +938,7 @@ _new_argsortlike(PyArrayObject *op, int axis, PyArray_ArgSortFunc *argsort,
npy_intp elsize = (npy_intp)PyArray_ITEMSIZE(op);
npy_intp astride = PyArray_STRIDE(op, axis);
int swap = PyArray_ISBYTESWAPPED(op);
- int needcopy = !PyArray_ISALIGNED(op) || swap || astride != elsize;
+ int needcopy = !IsAligned(op) || swap || astride != elsize;
int hasrefs = PyDataType_REFCHK(PyArray_DESCR(op));
int needidxbuffer;
diff --git a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
index b25b4a8b6..159bb4103 100644
--- a/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
+++ b/numpy/core/src/multiarray/lowlevel_strided_loops.c.src
@@ -10,7 +10,6 @@
#define PY_SSIZE_T_CLEAN
#include "Python.h"
-#include "structmember.h"
#define NPY_NO_DEPRECATED_API NPY_API_VERSION
#define _MULTIARRAYMODULE
@@ -19,16 +18,7 @@
#include <numpy/halffloat.h>
#include "lowlevel_strided_loops.h"
-
-/* used for some alignment checks */
-#define _ALIGN(type) offsetof(struct {char c; type v;}, v)
-/*
- * Disable harmless compiler warning "4116: unnamed type definition in
- * parentheses" which is caused by the _ALIGN macro.
- */
-#if defined(_MSC_VER)
-#pragma warning(disable:4116)
-#endif
+#include "array_assign.h"
/*
@@ -1385,7 +1375,7 @@ mapiter_trivial_@name@(PyArrayObject *self, PyArrayObject *ind,
npy_intp itersize;
- int is_aligned = PyArray_ISALIGNED(self) && PyArray_ISALIGNED(result);
+ int is_aligned = IsUintAligned(self) && IsUintAligned(result);
int needs_api = PyDataType_REFCHK(PyArray_DESCR(self));
PyArray_CopySwapFunc *copyswap = PyArray_DESCR(self)->f->copyswap;
@@ -1518,7 +1508,7 @@ mapiter_@name@(PyArrayMapIterObject *mit)
* could also check extra_op is buffered, but it should rarely matter.
*/
- is_aligned = PyArray_ISALIGNED(array) && PyArray_ISALIGNED(mit->extra_op);
+ is_aligned = IsUintAligned(array) && IsUintAligned(mit->extra_op);
if (mit->size == 0) {
return 0;
diff --git a/numpy/core/src/multiarray/mapping.c b/numpy/core/src/multiarray/mapping.c
index b50866b4d..038c21c92 100644
--- a/numpy/core/src/multiarray/mapping.c
+++ b/numpy/core/src/multiarray/mapping.c
@@ -20,6 +20,7 @@
#include "lowlevel_strided_loops.h"
#include "item_selection.h"
#include "mem_overlap.h"
+#include "array_assign.h"
#define HAS_INTEGER 1
@@ -1063,7 +1064,7 @@ array_boolean_subscript(PyArrayObject *self,
/* Get a dtype transfer function */
NpyIter_GetInnerFixedStrideArray(iter, fixed_strides);
- if (PyArray_GetDTypeTransferFunction(PyArray_ISALIGNED(self),
+ if (PyArray_GetDTypeTransferFunction(IsUintAligned(self),
fixed_strides[0], itemsize,
dtype, dtype,
0,
@@ -1252,7 +1253,7 @@ array_assign_boolean_subscript(PyArrayObject *self,
/* Get a dtype transfer function */
NpyIter_GetInnerFixedStrideArray(iter, fixed_strides);
if (PyArray_GetDTypeTransferFunction(
- PyArray_ISALIGNED(self) && PyArray_ISALIGNED(v),
+ IsUintAligned(self) && IsUintAligned(v),
v_stride, fixed_strides[0],
PyArray_DESCR(v), PyArray_DESCR(self),
0,
@@ -1723,7 +1724,7 @@ array_subscript(PyArrayObject *self, PyObject *op)
/* Check if the type is equivalent to INTP */
PyArray_ITEMSIZE(ind) == sizeof(npy_intp) &&
PyArray_DESCR(ind)->kind == 'i' &&
- PyArray_ISALIGNED(ind) &&
+ IsUintAligned(ind) &&
PyDataType_ISNOTSWAPPED(PyArray_DESCR(ind))) {
Py_INCREF(PyArray_DESCR(self));
@@ -2086,7 +2087,7 @@ array_assign_subscript(PyArrayObject *self, PyObject *ind, PyObject *op)
/* Check if the type is equivalent to INTP */
PyArray_ITEMSIZE(ind) == sizeof(npy_intp) &&
PyArray_DESCR(ind)->kind == 'i' &&
- PyArray_ISALIGNED(ind) &&
+ IsUintAligned(ind) &&
PyDataType_ISNOTSWAPPED(PyArray_DESCR(ind))) {
/* trivial_set checks the index for us */
@@ -2606,7 +2607,7 @@ PyArray_MapIterCheckIndices(PyArrayMapIterObject *mit)
/* Check if the type is equivalent to INTP */
PyArray_ITEMSIZE(op) == sizeof(npy_intp) &&
PyArray_DESCR(op)->kind == 'i' &&
- PyArray_ISALIGNED(op) &&
+ IsUintAligned(op) &&
PyDataType_ISNOTSWAPPED(PyArray_DESCR(op))) {
char *data;
npy_intp stride;
diff --git a/numpy/core/src/multiarray/methods.c b/numpy/core/src/multiarray/methods.c
index 6317d6a16..cb63c7f74 100644
--- a/numpy/core/src/multiarray/methods.c
+++ b/numpy/core/src/multiarray/methods.c
@@ -21,6 +21,7 @@
#include "conversion_utils.h"
#include "shape.h"
#include "strfuncs.h"
+#include "array_assign.h"
#include "methods.h"
#include "alloc.h"
@@ -1785,11 +1786,11 @@ array_setstate(PyArrayObject *self, PyObject *args)
fa->data = datastr;
#ifndef NPY_PY3K
/* Check that the string is not interned */
- if (!_IsAligned(self) || swap || PyString_CHECK_INTERNED(rawdata)) {
+ if (!IsAligned(self) || swap || PyString_CHECK_INTERNED(rawdata)) {
#else
/* Bytes should always be considered immutable, but we just grab the
* pointer if they are large, to save memory. */
- if (!_IsAligned(self) || swap || (len <= 1000)) {
+ if (!IsAligned(self) || swap || (len <= 1000)) {
#endif
npy_intp num = PyArray_NBYTES(self);
fa->data = PyDataMem_NEW(num);
@@ -2281,7 +2282,7 @@ array_setflags(PyArrayObject *self, PyObject *args, PyObject *kwds)
if (PyObject_Not(align_flag)) {
PyArray_CLEARFLAGS(self, NPY_ARRAY_ALIGNED);
}
- else if (_IsAligned(self)) {
+ else if (IsAligned(self)) {
PyArray_ENABLEFLAGS(self, NPY_ARRAY_ALIGNED);
}
else {
diff --git a/numpy/core/src/multiarray/nditer_constr.c b/numpy/core/src/multiarray/nditer_constr.c
index c56376f58..dbb24f26b 100644
--- a/numpy/core/src/multiarray/nditer_constr.c
+++ b/numpy/core/src/multiarray/nditer_constr.c
@@ -17,8 +17,7 @@
#include "arrayobject.h"
#include "templ_common.h"
-#include "mem_overlap.h"
-
+#include "array_assign.h"
/* Internal helper functions private to this file */
static int
@@ -1133,7 +1132,7 @@ npyiter_prepare_one_operand(PyArrayObject **op,
/* Check if the operand is aligned */
if (op_flags & NPY_ITER_ALIGNED) {
/* Check alignment */
- if (!PyArray_ISALIGNED(*op)) {
+ if (!IsUintAligned(*op)) {
NPY_IT_DBG_PRINT("Iterator: Setting NPY_OP_ITFLAG_CAST "
"because of NPY_ITER_ALIGNED\n");
*op_itflags |= NPY_OP_ITFLAG_CAST;
@@ -2975,7 +2974,7 @@ npyiter_allocate_arrays(NpyIter *iter,
* If the operand is aligned, any buffering can use aligned
* optimizations.
*/
- if (PyArray_ISALIGNED(op[iop])) {
+ if (IsUintAligned(op[iop])) {
op_itflags[iop] |= NPY_OP_ITFLAG_ALIGNED;
}
}
diff --git a/numpy/core/tests/test_multiarray.py b/numpy/core/tests/test_multiarray.py
index 6fbdb7c6c..f22ecdb79 100644
--- a/numpy/core/tests/test_multiarray.py
+++ b/numpy/core/tests/test_multiarray.py
@@ -7331,7 +7331,6 @@ class TestFormat(object):
dst = object.__format__(a, '30')
assert_equal(res, dst)
-
class TestCTypes(object):
def test_ctypes_is_available(self):
@@ -7611,3 +7610,39 @@ def test_npymath_real():
got = fun(z)
expected = npfun(z)
assert_allclose(got, expected)
+
+def test_uintalignment_and_alignment():
+ # alignment code needs to satisfy these requrements:
+ # 1. numpy structs match C struct layout
+ # 2. ufuncs/casting is safe wrt to aligned access
+ # 3. copy code is safe wrt to "uint alidned" access
+ #
+ # Complex types are the main problem, whose alignment may not be the same
+ # as their "uint alignment".
+ #
+ # This test might only fail on certain platforms, where uint64 alignment is
+ # not equal to complex64 alignment. The second 2 tests will only fail
+ # for DEBUG=1.
+
+ d1 = np.dtype('u1,c8', align=True)
+ d2 = np.dtype('u4,c8', align=True)
+ d3 = np.dtype({'names': ['a', 'b'], 'formats': ['u1', d1]}, align=True)
+
+ assert_equal(np.zeros(1, dtype=d1)['f1'].flags['ALIGNED'], True)
+ assert_equal(np.zeros(1, dtype=d2)['f1'].flags['ALIGNED'], True)
+ assert_equal(np.zeros(1, dtype='u1,c8')['f1'].flags['ALIGNED'], False)
+
+ # check that C struct matches numpy struct size
+ s = _multiarray_tests.get_struct_alignments()
+ for d, (alignment, size) in zip([d1,d2,d3], s):
+ assert_equal(d.alignment, alignment)
+ assert_equal(d.itemsize, size)
+
+ # check that ufuncs don't complain in debug mode
+ # (this is probably OK if the aligned flag is true above)
+ src = np.zeros((2,2), dtype=d1)['f1'] # 4-byte aligned, often
+ np.exp(src) # assert fails?
+
+ # check that copy code doesn't complain in debug mode
+ dst = np.zeros((2,2), dtype='c8')
+ dst[:,1] = src[:,1] # assert in lowlevel_strided_loops fails?