diff options
author | Charles Harris <charlesr.harris@gmail.com> | 2014-07-03 16:56:08 -0600 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2014-07-03 16:56:08 -0600 |
commit | e8d13740980189a255c3ca31ee33b4e390c2ed75 (patch) | |
tree | 9ebeae1fdd6ed8a77bfd40551d9af31abaa216cb /numpy | |
parent | b25cdd68ee38de1be374b396b3b72460c17cad79 (diff) | |
parent | 1d96a95208ed5b656a61f00781eeb984e2955a61 (diff) | |
download | numpy-e8d13740980189a255c3ca31ee33b4e390c2ed75.tar.gz |
Merge pull request #4812 from juliantaylor/align-bloat
Align bloat
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/core/src/multiarray/arraytypes.c.src | 6 | ||||
-rw-r--r-- | numpy/core/src/multiarray/common.c | 2 | ||||
-rw-r--r-- | numpy/core/src/multiarray/ctors.c | 10 | ||||
-rw-r--r-- | numpy/core/src/multiarray/shape.c | 3 | ||||
-rw-r--r-- | numpy/core/src/private/npy_config.h | 11 | ||||
-rw-r--r-- | numpy/core/tests/test_numeric.py | 12 | ||||
-rw-r--r-- | numpy/f2py/tests/test_array_from_pyobj.py | 79 |
7 files changed, 78 insertions, 45 deletions
diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src index d2532ccf0..8a0b1826b 100644 --- a/numpy/core/src/multiarray/arraytypes.c.src +++ b/numpy/core/src/multiarray/arraytypes.c.src @@ -3922,7 +3922,8 @@ NPY_NO_EXPORT PyArray_Descr @from@_Descr = { /* elsize */ @num@ * sizeof(@fromtype@), /* alignment */ - @num@ * _ALIGN(@fromtype@), + @num@ * _ALIGN(@fromtype@) > NPY_MAX_COPY_ALIGNMENT ? + NPY_MAX_COPY_ALIGNMENT : @num@ * _ALIGN(@fromtype@), /* subarray */ NULL, /* fields */ @@ -4264,7 +4265,8 @@ set_typeinfo(PyObject *dict) #endif NPY_@name@, NPY_BITSOF_@name@, - @num@ * _ALIGN(@type@), + @num@ * _ALIGN(@type@) > NPY_MAX_COPY_ALIGNMENT ? + NPY_MAX_COPY_ALIGNMENT : @num@ * _ALIGN(@type@), (PyObject *) &Py@Name@ArrType_Type)); Py_DECREF(s); diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c index 2b3d3c3d2..54b9e3c1a 100644 --- a/numpy/core/src/multiarray/common.c +++ b/numpy/core/src/multiarray/common.c @@ -676,7 +676,7 @@ _IsAligned(PyArrayObject *ap) /* alignment 1 types should have a efficient alignment for copy loops */ if (PyArray_ISFLEXIBLE(ap) || PyArray_ISSTRING(ap)) { - alignment = 16; + alignment = NPY_MAX_COPY_ALIGNMENT; } if (alignment == 1) { diff --git a/numpy/core/src/multiarray/ctors.c b/numpy/core/src/multiarray/ctors.c index d93995c8a..3da2dfae7 100644 --- a/numpy/core/src/multiarray/ctors.c +++ b/numpy/core/src/multiarray/ctors.c @@ -1054,12 +1054,12 @@ PyArray_NewFromDescr_int(PyTypeObject *subtype, PyArray_Descr *descr, int nd, fa->data = data; /* - * If the strides were provided to the function, need to - * update the flags to get the right CONTIGUOUS, ALIGN properties + * always update the flags to get the right CONTIGUOUS, ALIGN properties + * not owned data and input strides may not be aligned and on some + * platforms (debian sparc) malloc does not provide enough alignment for + * long double types */ - if (strides != NULL) { - PyArray_UpdateFlags((PyArrayObject *)fa, NPY_ARRAY_UPDATE_ALL); - } + PyArray_UpdateFlags((PyArrayObject *)fa, NPY_ARRAY_UPDATE_ALL); /* * call the __array_finalize__ diff --git a/numpy/core/src/multiarray/shape.c b/numpy/core/src/multiarray/shape.c index d5fde5b97..8b73f4709 100644 --- a/numpy/core/src/multiarray/shape.c +++ b/numpy/core/src/multiarray/shape.c @@ -776,7 +776,8 @@ PyArray_Transpose(PyArrayObject *ap, PyArray_Dims *permute) PyArray_DIMS(ret)[i] = PyArray_DIMS(ap)[permutation[i]]; PyArray_STRIDES(ret)[i] = PyArray_STRIDES(ap)[permutation[i]]; } - PyArray_UpdateFlags(ret, NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_F_CONTIGUOUS); + PyArray_UpdateFlags(ret, NPY_ARRAY_C_CONTIGUOUS | NPY_ARRAY_F_CONTIGUOUS | + NPY_ARRAY_ALIGNED); return (PyObject *)ret; } diff --git a/numpy/core/src/private/npy_config.h b/numpy/core/src/private/npy_config.h index 453dbd065..71d448ee9 100644 --- a/numpy/core/src/private/npy_config.h +++ b/numpy/core/src/private/npy_config.h @@ -10,6 +10,17 @@ #undef HAVE_HYPOT #endif +/* + * largest alignment the copy loops might require + * required as string, void and complex types might get copied using larger + * instructions than required to operate on them. E.g. complex float is copied + * in 8 byte moves but arithmetic on them only loads in 4 byte moves. + * the sparc platform may need that alignment for long doubles. + * amd64 is not harmed much by the bloat as the system provides 16 byte + * alignment by default. + */ +#define NPY_MAX_COPY_ALIGNMENT 16 + /* Safe to use ldexp and frexp for long double for MSVC builds */ #if (NPY_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE) || defined(_MSC_VER) #ifdef HAVE_LDEXP diff --git a/numpy/core/tests/test_numeric.py b/numpy/core/tests/test_numeric.py index 40bbe5aec..c85c5cf3e 100644 --- a/numpy/core/tests/test_numeric.py +++ b/numpy/core/tests/test_numeric.py @@ -5,6 +5,7 @@ import platform from decimal import Decimal import warnings import itertools +import platform import numpy as np from numpy.core import * @@ -931,6 +932,7 @@ class TestNonzero(TestCase): assert_equal(np.nonzero(x['a']), ([0, 1, 1, 2], [2, 0, 1, 1])) assert_equal(np.nonzero(x['b']), ([0, 0, 1, 2, 2], [0, 2, 0, 1, 2])) + assert_(not x['a'].T.flags.aligned) assert_equal(np.count_nonzero(x['a'].T), 4) assert_equal(np.count_nonzero(x['b'].T), 5) assert_equal(np.nonzero(x['a'].T), ([0, 1, 1, 2], [1, 1, 2, 0])) @@ -1048,7 +1050,15 @@ class TestArrayComparisons(TestCase): def assert_array_strict_equal(x, y): assert_array_equal(x, y) # Check flags - assert_(x.flags == y.flags) + if 'sparc' not in platform.platform().lower(): + assert_(x.flags == y.flags) + else: + # sparc arrays may not be aligned for long double types + assert_(x.flags.owndata == y.flags.owndata) + assert_(x.flags.writeable == y.flags.writeable) + assert_(x.flags.c_contiguous == y.flags.c_contiguous) + assert_(x.flags.f_contiguous == y.flags.f_contiguous) + assert_(x.flags.updateifcopy == y.flags.updateifcopy) # check endianness assert_(x.dtype.isnative == y.dtype.isnative) diff --git a/numpy/f2py/tests/test_array_from_pyobj.py b/numpy/f2py/tests/test_array_from_pyobj.py index 3a148e72c..de954c374 100644 --- a/numpy/f2py/tests/test_array_from_pyobj.py +++ b/numpy/f2py/tests/test_array_from_pyobj.py @@ -4,6 +4,7 @@ import unittest import os import sys import copy +import platform import nose @@ -81,37 +82,45 @@ class Intent(object): intent = Intent() -class Type(object): - _type_names = ['BOOL', 'BYTE', 'UBYTE', 'SHORT', 'USHORT', 'INT', 'UINT', - 'LONG', 'ULONG', 'LONGLONG', 'ULONGLONG', - 'FLOAT', 'DOUBLE', 'LONGDOUBLE', 'CFLOAT', 'CDOUBLE', - 'CLONGDOUBLE'] - _type_cache = {} - - _cast_dict = {'BOOL':['BOOL']} - _cast_dict['BYTE'] = _cast_dict['BOOL'] + ['BYTE'] - _cast_dict['UBYTE'] = _cast_dict['BOOL'] + ['UBYTE'] - _cast_dict['BYTE'] = ['BYTE'] - _cast_dict['UBYTE'] = ['UBYTE'] - _cast_dict['SHORT'] = _cast_dict['BYTE'] + ['UBYTE', 'SHORT'] - _cast_dict['USHORT'] = _cast_dict['UBYTE'] + ['BYTE', 'USHORT'] - _cast_dict['INT'] = _cast_dict['SHORT'] + ['USHORT', 'INT'] - _cast_dict['UINT'] = _cast_dict['USHORT'] + ['SHORT', 'UINT'] - - _cast_dict['LONG'] = _cast_dict['INT'] + ['LONG'] - _cast_dict['ULONG'] = _cast_dict['UINT'] + ['ULONG'] - - _cast_dict['LONGLONG'] = _cast_dict['LONG'] + ['LONGLONG'] - _cast_dict['ULONGLONG'] = _cast_dict['ULONG'] + ['ULONGLONG'] - - _cast_dict['FLOAT'] = _cast_dict['SHORT'] + ['USHORT', 'FLOAT'] - _cast_dict['DOUBLE'] = _cast_dict['INT'] + ['UINT', 'FLOAT', 'DOUBLE'] - _cast_dict['LONGDOUBLE'] = _cast_dict['LONG'] + ['ULONG', 'FLOAT', 'DOUBLE', 'LONGDOUBLE'] - - _cast_dict['CFLOAT'] = _cast_dict['FLOAT'] + ['CFLOAT'] +_type_names = ['BOOL', 'BYTE', 'UBYTE', 'SHORT', 'USHORT', 'INT', 'UINT', + 'LONG', 'ULONG', 'LONGLONG', 'ULONGLONG', + 'FLOAT', 'DOUBLE', 'CFLOAT'] + +_cast_dict = {'BOOL':['BOOL']} +_cast_dict['BYTE'] = _cast_dict['BOOL'] + ['BYTE'] +_cast_dict['UBYTE'] = _cast_dict['BOOL'] + ['UBYTE'] +_cast_dict['BYTE'] = ['BYTE'] +_cast_dict['UBYTE'] = ['UBYTE'] +_cast_dict['SHORT'] = _cast_dict['BYTE'] + ['UBYTE', 'SHORT'] +_cast_dict['USHORT'] = _cast_dict['UBYTE'] + ['BYTE', 'USHORT'] +_cast_dict['INT'] = _cast_dict['SHORT'] + ['USHORT', 'INT'] +_cast_dict['UINT'] = _cast_dict['USHORT'] + ['SHORT', 'UINT'] + +_cast_dict['LONG'] = _cast_dict['INT'] + ['LONG'] +_cast_dict['ULONG'] = _cast_dict['UINT'] + ['ULONG'] + +_cast_dict['LONGLONG'] = _cast_dict['LONG'] + ['LONGLONG'] +_cast_dict['ULONGLONG'] = _cast_dict['ULONG'] + ['ULONGLONG'] + +_cast_dict['FLOAT'] = _cast_dict['SHORT'] + ['USHORT', 'FLOAT'] +_cast_dict['DOUBLE'] = _cast_dict['INT'] + ['UINT', 'FLOAT', 'DOUBLE'] + +_cast_dict['CFLOAT'] = _cast_dict['FLOAT'] + ['CFLOAT'] + +# (debian) sparc system malloc does not provide the alignment required by +# 16 byte long double types this means the inout intent cannot be satisfied and +# several tests fail as the alignment flag can be randomly true or fals +# when numpy gains an aligned allocator the tests could be enabled again +if 'sparc' not in platform.platform().lower(): + _type_names.extend(['LONGDOUBLE', 'CDOUBLE', 'CLONGDOUBLE']) + _cast_dict['LONGDOUBLE'] = _cast_dict['LONG'] + \ + ['ULONG', 'FLOAT', 'DOUBLE', 'LONGDOUBLE'] + _cast_dict['CLONGDOUBLE'] = _cast_dict['LONGDOUBLE'] + \ + ['CFLOAT', 'CDOUBLE', 'CLONGDOUBLE'] _cast_dict['CDOUBLE'] = _cast_dict['DOUBLE'] + ['CFLOAT', 'CDOUBLE'] - _cast_dict['CLONGDOUBLE'] = _cast_dict['LONGDOUBLE'] + ['CFLOAT', 'CDOUBLE', 'CLONGDOUBLE'] +class Type(object): + _type_cache = {} def __new__(cls, name): if isinstance(name, dtype): @@ -138,15 +147,15 @@ class Type(object): self.dtypechar = typeinfo[self.NAME][0] def cast_types(self): - return [self.__class__(_m) for _m in self._cast_dict[self.NAME]] + return [self.__class__(_m) for _m in _cast_dict[self.NAME]] def all_types(self): - return [self.__class__(_m) for _m in self._type_names] + return [self.__class__(_m) for _m in _type_names] def smaller_types(self): bits = typeinfo[self.NAME][3] types = [] - for name in self._type_names: + for name in _type_names: if typeinfo[name][3]<bits: types.append(Type(name)) return types @@ -154,7 +163,7 @@ class Type(object): def equal_types(self): bits = typeinfo[self.NAME][3] types = [] - for name in self._type_names: + for name in _type_names: if name==self.NAME: continue if typeinfo[name][3]==bits: types.append(Type(name)) @@ -163,7 +172,7 @@ class Type(object): def larger_types(self): bits = typeinfo[self.NAME][3] types = [] - for name in self._type_names: + for name in _type_names: if typeinfo[name][3]>bits: types.append(Type(name)) return types @@ -532,7 +541,7 @@ class _test_shared_memory: assert_(obj.dtype.type is self.type.dtype) # obj type is changed inplace! -for t in Type._type_names: +for t in _type_names: exec('''\ class test_%s_gen(unittest.TestCase, _test_shared_memory |