diff options
author | Sayed Adel <seiko@imavr.com> | 2020-09-15 16:06:14 +0200 |
---|---|---|
committer | Sayed Adel <seiko@imavr.com> | 2020-10-27 11:46:58 +0000 |
commit | 7d125fb70cb149207171b7181312f9679dd4d451 (patch) | |
tree | 8c6cbd3b0dd140160d96b98794f3284857681c82 /numpy | |
parent | e19f7a8cfe59226fed6cc7a7da9e881218364d49 (diff) | |
download | numpy-7d125fb70cb149207171b7181312f9679dd4d451.tar.gz |
MAINT, TST: Serveral imporvments to _SIMD module
- use plain variables
- clean up aligned allocate
- use `PyArg_ParseTuple` for empty args
- use `Py_ssize_t` instead of `unsigned` and `size_t`
- improve coding style
- no need for a custom raises assertions
- use parametrize instead of inner loops
- leave a comment about nature of mode testing unit
- shift to get max/min of int72
- add more info to repr of vector object
- get ride of exec() and use type() instead
- use `.inc` as extension for sub-headers instead of `.h`
- add `FMA4` and drop `SSE41` from _SIMD targets
Co-authored-by: Eric Wieser <wieser.eric@gmail.com>
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/core/setup.py | 12 | ||||
-rw-r--r-- | numpy/core/src/_simd/_simd.dispatch.c.src | 34 | ||||
-rw-r--r-- | numpy/core/src/_simd/_simd_arg.inc (renamed from numpy/core/src/_simd/_simd_inc_arg.h) | 39 | ||||
-rw-r--r-- | numpy/core/src/_simd/_simd_convert.inc (renamed from numpy/core/src/_simd/_simd_inc_convert.h) | 73 | ||||
-rw-r--r-- | numpy/core/src/_simd/_simd_data.inc.src (renamed from numpy/core/src/_simd/_simd_inc_data.h.src) | 10 | ||||
-rw-r--r-- | numpy/core/src/_simd/_simd_easyintrin.inc (renamed from numpy/core/src/_simd/_simd_inc_easyintrin.h) | 133 | ||||
-rw-r--r-- | numpy/core/src/_simd/_simd_inc.h.src | 107 | ||||
-rw-r--r-- | numpy/core/src/_simd/_simd_vector.inc (renamed from numpy/core/src/_simd/_simd_inc_vector.h) | 87 | ||||
-rw-r--r-- | numpy/core/tests/test_simd.py | 40 | ||||
-rw-r--r-- | numpy/core/tests/test_simd_module.py | 127 | ||||
-rw-r--r-- | numpy/distutils/command/build.py | 2 |
11 files changed, 323 insertions, 341 deletions
diff --git a/numpy/core/setup.py b/numpy/core/setup.py index e9a9a4e46..68aa0a851 100644 --- a/numpy/core/setup.py +++ b/numpy/core/setup.py @@ -983,18 +983,18 @@ def configuration(parent_package='',top_path=None): join('src', 'common', 'npy_cpu_features.c.src'), join('src', '_simd', '_simd.c'), join('src', '_simd', '_simd_inc.h.src'), - join('src', '_simd', '_simd_inc_data.h.src'), + join('src', '_simd', '_simd_data.inc.src'), join('src', '_simd', '_simd.dispatch.c.src'), ], depends=[ join('src', 'common', 'npy_cpu_dispatch.h'), join('src', 'common', 'simd', 'simd.h'), join('src', '_simd', '_simd.h'), join('src', '_simd', '_simd_inc.h.src'), - join('src', '_simd', '_simd_inc_data.h.src'), - join('src', '_simd', '_simd_inc_arg.h'), - join('src', '_simd', '_simd_inc_convert.h'), - join('src', '_simd', '_simd_inc_easyintrin.h'), - join('src', '_simd', '_simd_inc_vector.h'), + join('src', '_simd', '_simd_data.inc.src'), + join('src', '_simd', '_simd_arg.inc'), + join('src', '_simd', '_simd_convert.inc'), + join('src', '_simd', '_simd_easyintrin.inc'), + join('src', '_simd', '_simd_vector.inc'), ]) config.add_subpackage('tests') diff --git a/numpy/core/src/_simd/_simd.dispatch.c.src b/numpy/core/src/_simd/_simd.dispatch.c.src index a776ba37b..1989be7e3 100644 --- a/numpy/core/src/_simd/_simd.dispatch.c.src +++ b/numpy/core/src/_simd/_simd.dispatch.c.src @@ -3,11 +3,11 @@ #include "_simd_inc.h" #if NPY_SIMD -#include "_simd_inc_data.h" -#include "_simd_inc_convert.h" -#include "_simd_inc_vector.h" -#include "_simd_inc_arg.h" -#include "_simd_inc_easyintrin.h" +#include "_simd_data.inc" +#include "_simd_convert.inc" +#include "_simd_vector.inc" +#include "_simd_arg.inc" +#include "_simd_easyintrin.inc" /************************************************************************* * Defining NPYV intrinsics as module functions @@ -39,26 +39,24 @@ SIMD_IMPL_INTRIN_1(@intrin@_@sfx@, v@sfx@, q@sfx@) static PyObject * simd__intrin_@intrin@_@sfx@(PyObject* NPY_UNUSED(self), PyObject *args) { - simd_arg req_args[] = { - {.dtype = simd_data_q@sfx@}, - {.dtype = simd_data_v@sfx@}, - }; + simd_arg seq_arg = {.dtype = simd_data_q@sfx@}; + simd_arg vec_arg = {.dtype = simd_data_v@sfx@}; if (!PyArg_ParseTuple( args, "O&O&:@intrin@_@sfx@", - simd_arg_converter, &req_args[0], - simd_arg_converter, &req_args[1] + simd_arg_converter, &seq_arg, + simd_arg_converter, &vec_arg )) { return NULL; } npyv_@intrin@_@sfx@( - req_args[0].data.q@sfx@, req_args[1].data.v@sfx@ + seq_arg.data.q@sfx@, vec_arg.data.v@sfx@ ); // write-back - if (simd_sequence_fill_obj(req_args[0].obj, req_args[0].data.q@sfx@, simd_data_q@sfx@)) { - simd_args_sequence_free(req_args, 2); + if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.q@sfx@, simd_data_q@sfx@)) { + simd_arg_free(&seq_arg); return NULL; } - simd_args_sequence_free(req_args, 2); + simd_arg_free(&seq_arg); Py_RETURN_NONE; } /**end repeat1**/ @@ -89,7 +87,7 @@ SIMD_IMPL_INTRIN_1(reinterpret_@sfx_to@_@sfx@, v@sfx_to@, v@sfx@) static PyObject * simd__intrin_@intrin@_@sfx@(PyObject* NPY_UNUSED(self), PyObject *args) { - npyv_lanetype_@sfx@ *data = simd_sequence_from_obj(args, simd_data_q@sfx@, npyv_nlanes_@sfx@); + npyv_lanetype_@sfx@ *data = simd_sequence_from_iterable(args, simd_data_q@sfx@, npyv_nlanes_@sfx@); if (data == NULL) { return NULL; } @@ -105,7 +103,7 @@ simd__intrin_@intrin@_@sfx@(PyObject* NPY_UNUSED(self), PyObject *args) data[64] // for setf )}; simd_sequence_free(data); - return (PyObject*)simd_vector_to_obj(r, simd_data_v@sfx@); + return (PyObject*)PySIMDVector_FromData(r, simd_data_v@sfx@); } /**end repeat1**/ @@ -354,7 +352,7 @@ NPY_CPU_DISPATCH_CURFX(simd_create_module)(void) goto err; } #if NPY_SIMD - if (simd_vector_register(m)) { + if (PySIMDVectorType_Init(m)) { goto err; } /**begin repeat diff --git a/numpy/core/src/_simd/_simd_inc_arg.h b/numpy/core/src/_simd/_simd_arg.inc index eabf49c5f..f5bcf5487 100644 --- a/numpy/core/src/_simd/_simd_inc_arg.h +++ b/numpy/core/src/_simd/_simd_arg.inc @@ -1,7 +1,9 @@ -#if !NPY_SIMD - #error "Not a standalone header, only works through 'simd.dispatch.c.src'" -#endif - +/** + * This file is included by `_simd.dispatch.c.src`. Its contents are affected by the simd configuration, and + * therefore must be built multiple times. Making it a standalone `.c` file with `NPY_VISIBILITY_HIDDEN` + * symbols would require judicious use of `NPY_CPU_DISPATCH_DECLARE` and `NPY_CPU_DISPATCH_CURFX`, which was + * deemed too harmful to readability. + */ /************************************ ** Protected Definitions ************************************/ @@ -11,17 +13,17 @@ simd_arg_from_obj(PyObject *obj, simd_arg *arg) assert(arg->dtype != 0); const simd_data_info *info = simd_data_getinfo(arg->dtype); if (info->is_scalar) { - arg->data = simd_scalar_from_obj(obj, arg->dtype); + arg->data = simd_scalar_from_number(obj, arg->dtype); } else if (info->is_sequence) { unsigned min_seq_size = simd_data_getinfo(info->to_vector)->nlanes; - arg->data.qu8 = simd_sequence_from_obj(obj, arg->dtype, min_seq_size); + arg->data.qu8 = simd_sequence_from_iterable(obj, arg->dtype, min_seq_size); } else if (info->is_vectorx) { - arg->data = simd_vectorx_from_obj(obj, arg->dtype); + arg->data = simd_vectorx_from_tuple(obj, arg->dtype); } else if (info->is_vector) { - arg->data = simd_vector_from_obj((simd_vector*)obj, arg->dtype); + arg->data = PySIMDVector_AsData((PySIMDVectorObject*)obj, arg->dtype); } else { arg->data.u64 = 0; PyErr_Format(PyExc_RuntimeError, @@ -41,16 +43,16 @@ simd_arg_to_obj(const simd_arg *arg) assert(arg->dtype != 0); const simd_data_info *info = simd_data_getinfo(arg->dtype); if (info->is_scalar) { - return simd_scalar_to_obj(arg->data, arg->dtype); + return simd_scalar_to_number(arg->data, arg->dtype); } if (info->is_sequence) { - return simd_sequence_to_obj(arg->data.qu8, arg->dtype); + return simd_sequence_to_list(arg->data.qu8, arg->dtype); } if (info->is_vectorx) { - return simd_vectorx_to_obj(arg->data, arg->dtype); + return simd_vectorx_to_tuple(arg->data, arg->dtype); } if (info->is_vector) { - return (PyObject*)simd_vector_to_obj(arg->data, arg->dtype); + return (PyObject*)PySIMDVector_FromData(arg->data, arg->dtype); } PyErr_Format(PyExc_RuntimeError, "unhandled arg to object type id:%d, name:%s", arg->dtype, info->pyname @@ -59,15 +61,10 @@ simd_arg_to_obj(const simd_arg *arg) } static void -simd_args_sequence_free(simd_arg *args, int args_len) +simd_arg_free(simd_arg *arg) { - assert(args_len > 0); - while (--args_len >= 0) { - simd_arg *arg = &args[args_len]; - const simd_data_info *info = simd_data_getinfo(arg->dtype); - if (!info->is_sequence) { - continue; - } + const simd_data_info *info = simd_data_getinfo(arg->dtype); + if (info->is_sequence) { simd_sequence_free(arg->data.qu8); } } @@ -82,7 +79,7 @@ simd_arg_converter(PyObject *obj, simd_arg *arg) arg->obj = obj; return Py_CLEANUP_SUPPORTED; } else { - simd_args_sequence_free(arg, 1); + simd_arg_free(arg); } return 1; } diff --git a/numpy/core/src/_simd/_simd_inc_convert.h b/numpy/core/src/_simd/_simd_convert.inc index 360101247..f5bfc3f50 100644 --- a/numpy/core/src/_simd/_simd_inc_convert.h +++ b/numpy/core/src/_simd/_simd_convert.inc @@ -1,12 +1,14 @@ -#if !NPY_SIMD - #error "Not a standalone header, only works through 'simd.dispatch.c.src'" -#endif - +/** + * This file is included by `_simd.dispatch.c.src`. Its contents are affected by the simd configuration, and + * therefore must be built multiple times. Making it a standalone `.c` file with `NPY_VISIBILITY_HIDDEN` + * symbols would require judicious use of `NPY_CPU_DISPATCH_DECLARE` and `NPY_CPU_DISPATCH_CURFX`, which was + * deemed too harmful to readability. + */ /************************************ ** Protected Definitions ************************************/ static simd_data -simd_scalar_from_obj(PyObject *obj, simd_data_type dtype) +simd_scalar_from_number(PyObject *obj, simd_data_type dtype) { const simd_data_info *info = simd_data_getinfo(dtype); assert(info->is_scalar && info->lane_size > 0); @@ -23,7 +25,7 @@ simd_scalar_from_obj(PyObject *obj, simd_data_type dtype) } static PyObject * -simd_scalar_to_obj(simd_data data, simd_data_type dtype) +simd_scalar_to_number(simd_data data, simd_data_type dtype) { const simd_data_info *info = simd_data_getinfo(dtype); assert(info->is_scalar && info->lane_size > 0); @@ -41,43 +43,44 @@ simd_scalar_to_obj(simd_data data, simd_data_type dtype) return PyLong_FromUnsignedLongLong(data.u64 >> leftb); } +typedef struct { + Py_ssize_t len; + void *ptr; +} simd__alloc_data; + static void * simd_sequence_new(Py_ssize_t len, simd_data_type dtype) { const simd_data_info *info = simd_data_getinfo(dtype); - assert(info->is_sequence && info->lane_size > 0); - - size_t size = NPY_SIMD_WIDTH + sizeof(size_t) + sizeof(size_t*); - size += len * info->lane_size; - - size_t *ptr = malloc(size); + assert(len > 0 && info->is_sequence && info->lane_size > 0); + size_t size = sizeof(simd__alloc_data) + len * info->lane_size + NPY_SIMD_WIDTH; + void *ptr = malloc(size); if (ptr == NULL) { return PyErr_NoMemory(); } - *(ptr++) = len; - size_t **a_ptr = (size_t**)( - ((size_t)ptr + NPY_SIMD_WIDTH) & ~(size_t)(NPY_SIMD_WIDTH-1) + // align the pointer + simd__alloc_data *a_ptr = (simd__alloc_data *)( + ((uintptr_t)ptr + sizeof(simd__alloc_data) + NPY_SIMD_WIDTH) & ~(uintptr_t)(NPY_SIMD_WIDTH-1) ); - a_ptr[-1] = ptr; + a_ptr[-1].len = len; + a_ptr[-1].ptr = ptr; return a_ptr; } -static size_t -simd_sequence_len(const void *ptr) +static Py_ssize_t +simd_sequence_len(void const *ptr) { - size_t *ptrz = ((size_t**)ptr)[-1]; - return *(ptrz-1); + return ((simd__alloc_data const*)ptr)[-1].len; } static void simd_sequence_free(void *ptr) { - size_t *ptrz = ((size_t**)ptr)[-1]; - free(ptrz-1); + free(((simd__alloc_data *)ptr)[-1].ptr); } static void * -simd_sequence_from_obj(PyObject *obj, simd_data_type dtype, unsigned min_size) +simd_sequence_from_iterable(PyObject *obj, simd_data_type dtype, Py_ssize_t min_size) { const simd_data_info *info = simd_data_getinfo(dtype); assert(info->is_sequence && info->lane_size > 0); @@ -86,7 +89,7 @@ simd_sequence_from_obj(PyObject *obj, simd_data_type dtype, unsigned min_size) return NULL; } Py_ssize_t seq_size = PySequence_Fast_GET_SIZE(seq_obj); - if (seq_size < (Py_ssize_t)min_size) { + if (seq_size < min_size) { PyErr_Format(PyExc_ValueError, "minimum acceptable size of the required sequence is %d, given(%d)", min_size, seq_size @@ -99,7 +102,7 @@ simd_sequence_from_obj(PyObject *obj, simd_data_type dtype, unsigned min_size) } PyObject **seq_items = PySequence_Fast_ITEMS(seq_obj); for (Py_ssize_t i = 0; i < seq_size; ++i) { - simd_data data = simd_scalar_from_obj(seq_items[i], info->to_scalar); + simd_data data = simd_scalar_from_number(seq_items[i], info->to_scalar); npyv_lanetype_u8 *sdst = dst + i * info->lane_size; memcpy(sdst, &data.u64, info->lane_size); } @@ -113,7 +116,7 @@ simd_sequence_from_obj(PyObject *obj, simd_data_type dtype, unsigned min_size) } static int -simd_sequence_fill_obj(PyObject *obj, const void *ptr, simd_data_type dtype) +simd_sequence_fill_iterable(PyObject *obj, const void *ptr, simd_data_type dtype) { const simd_data_info *info = simd_data_getinfo(dtype); if (!PySequence_Check(obj)) { @@ -123,12 +126,12 @@ simd_sequence_fill_obj(PyObject *obj, const void *ptr, simd_data_type dtype) return -1; } const npyv_lanetype_u8 *src = ptr; - Py_ssize_t seq_len = (Py_ssize_t)simd_sequence_len(ptr); + Py_ssize_t seq_len = simd_sequence_len(ptr); for (Py_ssize_t i = 0; i < seq_len; ++i) { const npyv_lanetype_u8 *ssrc = src + i * info->lane_size; simd_data data; memcpy(&data.u64, ssrc, info->lane_size); - PyObject *item = simd_scalar_to_obj(data, info->to_scalar); + PyObject *item = simd_scalar_to_number(data, info->to_scalar); if (item == NULL) { return -1; } @@ -141,13 +144,13 @@ simd_sequence_fill_obj(PyObject *obj, const void *ptr, simd_data_type dtype) } static PyObject * -simd_sequence_to_obj(const void *ptr, simd_data_type dtype) +simd_sequence_to_list(const void *ptr, simd_data_type dtype) { - PyObject *list = PyList_New((Py_ssize_t)simd_sequence_len(ptr)); + PyObject *list = PyList_New(simd_sequence_len(ptr)); if (list == NULL) { return NULL; } - if (simd_sequence_fill_obj(list, ptr, dtype) < 0) { + if (simd_sequence_fill_iterable(list, ptr, dtype) < 0) { Py_DECREF(list); return NULL; } @@ -155,7 +158,7 @@ simd_sequence_to_obj(const void *ptr, simd_data_type dtype) } static simd_data -simd_vectorx_from_obj(PyObject *obj, simd_data_type dtype) +simd_vectorx_from_tuple(PyObject *obj, simd_data_type dtype) { const simd_data_info *info = simd_data_getinfo(dtype); // NPYV currently only supports x2 and x3 @@ -172,7 +175,7 @@ simd_vectorx_from_obj(PyObject *obj, simd_data_type dtype) for (int i = 0; i < info->is_vectorx; ++i) { PyObject *item = PyTuple_GET_ITEM(obj, i); // get the max multi-vec and let the compiler do the rest - data.vu64x3.val[i] = simd_vector_from_obj((simd_vector*)item, info->to_vector).vu64; + data.vu64x3.val[i] = PySIMDVector_AsData((PySIMDVectorObject*)item, info->to_vector).vu64; if (PyErr_Occurred()) { return data; } @@ -181,7 +184,7 @@ simd_vectorx_from_obj(PyObject *obj, simd_data_type dtype) } static PyObject * -simd_vectorx_to_obj(simd_data data, simd_data_type dtype) +simd_vectorx_to_tuple(simd_data data, simd_data_type dtype) { const simd_data_info *info = simd_data_getinfo(dtype); // NPYV currently only supports x2 and x3 @@ -194,7 +197,7 @@ simd_vectorx_to_obj(simd_data data, simd_data_type dtype) for (int i = 0; i < info->is_vectorx; ++i) { // get the max multi-vector and let the compiler handle the rest simd_data vdata = {.vu64 = data.vu64x3.val[i]}; - PyObject *item = (PyObject*)simd_vector_to_obj(vdata, info->to_vector); + PyObject *item = (PyObject*)PySIMDVector_FromData(vdata, info->to_vector); if (item == NULL) { // TODO: improve log add item number Py_DECREF(tuple); diff --git a/numpy/core/src/_simd/_simd_inc_data.h.src b/numpy/core/src/_simd/_simd_data.inc.src index eefac483b..5c796487c 100644 --- a/numpy/core/src/_simd/_simd_inc_data.h.src +++ b/numpy/core/src/_simd/_simd_data.inc.src @@ -1,7 +1,9 @@ -#if !NPY_SIMD - #error "Not a standalone header, only works through 'simd.dispatch.c.src'" -#endif - +/** + * This file is included by `_simd.dispatch.c.src`. Its contents are affected by the simd configuration, and + * therefore must be built multiple times. Making it a standalone `.c` file with `NPY_VISIBILITY_HIDDEN` + * symbols would require judicious use of `NPY_CPU_DISPATCH_DECLARE` and `NPY_CPU_DISPATCH_CURFX`, which was + * deemed too harmful to readability. + */ /************************************ ** Private Definitions ************************************/ diff --git a/numpy/core/src/_simd/_simd_inc_easyintrin.h b/numpy/core/src/_simd/_simd_easyintrin.inc index 9ff227999..54e7ccf01 100644 --- a/numpy/core/src/_simd/_simd_inc_easyintrin.h +++ b/numpy/core/src/_simd/_simd_easyintrin.inc @@ -1,33 +1,19 @@ -#if !NPY_SIMD - #error "Not a standalone header, only works through 'simd.dispatch.c.src'" -#endif - +/** + * This file is included by `_simd.dispatch.c.src`. Its contents are affected by the simd configuration, and + * therefore must be built multiple times. Making it a standalone `.c` file with `NPY_VISIBILITY_HIDDEN` + * symbols would require judicious use of `NPY_CPU_DISPATCH_DECLARE` and `NPY_CPU_DISPATCH_CURFX`, which was + * deemed too harmful to readability. + */ #define SIMD_INTRIN_DEF(NAME) \ { NPY_TOSTRING(NAME), simd__intrin_##NAME, METH_VARARGS, NULL } , // comma -static int simd__no_arguments(PyObject *args, const char* method_name) -{ - if (args == NULL) { - return 0; - } - assert(PyTuple_Check(args)); - Py_ssize_t obj_arg_len = PyTuple_GET_SIZE(args); - if (obj_arg_len != 0) { - PyErr_Format(PyExc_RuntimeError, - "%s(), takes no arguments, given(%d)", method_name, obj_arg_len - ); - return -1; - } - return 0; -} - #define SIMD_IMPL_INTRIN_0(NAME, RET) \ static PyObject *simd__intrin_##NAME \ (PyObject* NPY_UNUSED(self), PyObject *args) \ { \ - if (simd__no_arguments( \ - args, NPY_TOSTRING(NAME) \ - )) return NULL; \ + if (!PyArg_ParseTuple( \ + args, ":" NPY_TOSTRING(NAME)) \ + ) return NULL; \ simd_arg a = { \ .dtype = simd_data_##RET, \ .data = {.RET = npyv_##NAME()}, \ @@ -39,9 +25,9 @@ static int simd__no_arguments(PyObject *args, const char* method_name) static PyObject *simd__intrin_##NAME \ (PyObject* NPY_UNUSED(self), PyObject *args) \ { \ - if (simd__no_arguments( \ - args, NPY_TOSTRING(NAME) \ - )) return NULL; \ + if (!PyArg_ParseTuple( \ + args, ":" NPY_TOSTRING(NAME)) \ + ) return NULL; \ npyv_##NAME(); \ Py_RETURN_NONE; \ } @@ -55,88 +41,87 @@ static int simd__no_arguments(PyObject *args, const char* method_name) args, "O&:"NPY_TOSTRING(NAME), \ simd_arg_converter, &arg \ )) return NULL; \ - simd_data r = {.RET = npyv_##NAME( \ + simd_data data = {.RET = npyv_##NAME( \ arg.data.IN0 \ )}; \ - simd_args_sequence_free(&arg, 1); \ - arg.data = r; \ - arg.dtype = simd_data_##RET; \ - return simd_arg_to_obj(&arg); \ + simd_arg_free(&arg); \ + simd_arg ret = { \ + .data = data, .dtype = simd_data_##RET \ + }; \ + return simd_arg_to_obj(&ret); \ } #define SIMD_IMPL_INTRIN_2(NAME, RET, IN0, IN1) \ static PyObject *simd__intrin_##NAME \ (PyObject* NPY_UNUSED(self), PyObject *args) \ { \ - simd_arg req_args[] = { \ - {.dtype = simd_data_##IN0}, \ - {.dtype = simd_data_##IN1}, \ - }; \ + simd_arg arg1 = {.dtype = simd_data_##IN0}; \ + simd_arg arg2 = {.dtype = simd_data_##IN1}; \ if (!PyArg_ParseTuple( \ args, "O&O&:"NPY_TOSTRING(NAME), \ - simd_arg_converter, &req_args[0], \ - simd_arg_converter, &req_args[1] \ + simd_arg_converter, &arg1, \ + simd_arg_converter, &arg2 \ )) return NULL; \ - simd_data r = {.RET = npyv_##NAME( \ - req_args[0].data.IN0, \ - req_args[1].data.IN1 \ + simd_data data = {.RET = npyv_##NAME( \ + arg1.data.IN0, arg2.data.IN1 \ )}; \ - simd_args_sequence_free(req_args, 2); \ - req_args[0].data = r; \ - req_args[0].dtype = simd_data_##RET; \ - return simd_arg_to_obj(req_args); \ + simd_arg_free(&arg1); \ + simd_arg_free(&arg2); \ + simd_arg ret = { \ + .data = data, .dtype = simd_data_##RET \ + }; \ + return simd_arg_to_obj(&ret); \ } #define SIMD__REPEAT_2IMM(C, NAME, IN0) \ - C == req_args[1].data.u8 ? NPY_CAT(npyv_, NAME)(req_args[0].data.IN0, C) : + C == arg2.data.u8 ? NPY_CAT(npyv_, NAME)(arg1.data.IN0, C) : #define SIMD_IMPL_INTRIN_2IMM(NAME, RET, IN0, CONST_RNG) \ static PyObject *simd__intrin_##NAME \ (PyObject* NPY_UNUSED(self), PyObject *args) \ { \ - simd_arg req_args[] = { \ - {.dtype = simd_data_##IN0}, \ - {.dtype = simd_data_u8}, \ - }; \ + simd_arg arg1 = {.dtype = simd_data_##IN0}; \ + simd_arg arg2 = {.dtype = simd_data_u8}; \ if (!PyArg_ParseTuple( \ args, "O&O&:"NPY_TOSTRING(NAME), \ - simd_arg_converter, &req_args[0], \ - simd_arg_converter, &req_args[1] \ + simd_arg_converter, &arg1, \ + simd_arg_converter, &arg2 \ )) return NULL; \ - simd_data r; \ - r.RET = NPY_CAT(SIMD__IMPL_COUNT_, CONST_RNG)( \ + simd_data data; \ + data.RET = NPY_CAT(SIMD__IMPL_COUNT_, CONST_RNG)( \ SIMD__REPEAT_2IMM, NAME, IN0 \ - ) npyv_##NAME(req_args[0].data.IN0, 0); \ - simd_args_sequence_free(req_args, 2); \ - req_args[0].data = r; \ - req_args[0].dtype = simd_data_##RET; \ - return simd_arg_to_obj(req_args); \ + ) npyv_##NAME(arg1.data.IN0, 0); \ + simd_arg_free(&arg1); \ + simd_arg ret = { \ + .data = data, .dtype = simd_data_##RET \ + }; \ + return simd_arg_to_obj(&ret); \ } #define SIMD_IMPL_INTRIN_3(NAME, RET, IN0, IN1, IN2) \ static PyObject *simd__intrin_##NAME \ (PyObject* NPY_UNUSED(self), PyObject *args) \ { \ - simd_arg req_args[] = { \ - {.dtype = simd_data_##IN0}, \ - {.dtype = simd_data_##IN1}, \ - {.dtype = simd_data_##IN2}, \ - }; \ + simd_arg arg1 = {.dtype = simd_data_##IN0}; \ + simd_arg arg2 = {.dtype = simd_data_##IN1}; \ + simd_arg arg3 = {.dtype = simd_data_##IN2}; \ if (!PyArg_ParseTuple( \ args, "O&O&O&:"NPY_TOSTRING(NAME), \ - simd_arg_converter, &req_args[0], \ - simd_arg_converter, &req_args[1], \ - simd_arg_converter, &req_args[2] \ + simd_arg_converter, &arg1, \ + simd_arg_converter, &arg2, \ + simd_arg_converter, &arg3 \ )) return NULL; \ - simd_data r = {.RET = npyv_##NAME( \ - req_args[0].data.IN0, \ - req_args[1].data.IN1, \ - req_args[2].data.IN2 \ + simd_data data = {.RET = npyv_##NAME( \ + arg1.data.IN0, arg2.data.IN1, \ + arg3.data.IN2 \ )}; \ - simd_args_sequence_free(req_args, 3); \ - req_args[0].data = r; \ - req_args[0].dtype = simd_data_##RET; \ - return simd_arg_to_obj(req_args); \ + simd_arg_free(&arg1); \ + simd_arg_free(&arg2); \ + simd_arg_free(&arg3); \ + simd_arg ret = { \ + .data = data, .dtype = simd_data_##RET \ + }; \ + return simd_arg_to_obj(&ret); \ } /** * Helper macros for repeating and expand a certain macro. diff --git a/numpy/core/src/_simd/_simd_inc.h.src b/numpy/core/src/_simd/_simd_inc.h.src index 530c7a736..9858fc0dc 100644 --- a/numpy/core/src/_simd/_simd_inc.h.src +++ b/numpy/core/src/_simd/_simd_inc.h.src @@ -1,6 +1,6 @@ -/** - * This header works only through '_simd.dispatch.c' - */ +#ifndef _SIMD_SIMD_INC_H_ +#define _SIMD_SIMD_INC_H_ + #include <Python.h> #include "simd/simd.h" @@ -147,29 +147,47 @@ typedef struct { PyObject_HEAD // vector type id - simd_data_type type; + simd_data_type dtype; // vector data, aligned for safe casting npyv_lanetype_u8 NPY_DECL_ALIGNED(NPY_SIMD_WIDTH) data[NPY_SIMD_WIDTH]; -} simd_vector; +} PySIMDVectorObject; /** - * convert simd_data to PyObject(simd_vector), - * raise Python exception on failure and returns NULL. + * Create a Python obj(PySIMDVectorObject) from a NPYV vector based on the contents + * of `data`(simd_data) and according to the vector data type `dtype` + * on range(simd_data_[vu8:vf64]). + * Return NULL and a Python exception on failure, otherwise new reference. + * + * Example: + ** simd_data data = {.vu8 = npyv_setall_u8(0xff)}; + ** PySIMDVectorObject *obj = PySIMDVector_FromData(data, simd_data_vu8); + ** if (obj != NULL) { + ** printf("I have a valid vector obj and first element is \n", obj->data[0]); + ** Py_DECREF(obj); + ** } */ -static simd_vector * -simd_vector_to_obj(simd_data data, simd_data_type vtype); +static PySIMDVectorObject * +PySIMDVector_FromData(simd_data data, simd_data_type dtype); /** - * convert PyObject(simd_vector) to simd_data, - * raise Python exception on failure. + * Return a NPYV vector(simd_data) representation of `obj`(PySIMDVectorObject) and + * according to the vector data type `dtype` on range (simd_data_[vu8:vf64]). + * Raise a Python exception on failure. + * + * Example: + ** simd_data data = PySIMDVector_AsData(vec_obj, simd_data_vf32); + ** if (!PyErr_Occurred()) { + ** npyv_f32 add_1 = npyv_add_f32(data.vf32, npyv_setall_f32(1)); + ** ... + ** } */ static simd_data -simd_vector_from_obj(simd_vector *vec, simd_data_type vtype); +PySIMDVector_AsData(PySIMDVectorObject *obj, simd_data_type dtype); /** - * initialize and register vector type(PyTypeObject) to PyModule, - * vector type can be reached through attribute 'vector_type'. + * initialize and register PySIMDVectorType to certain PyModule, + * PySIMDVectorType can be reached through attribute 'vector_type'. * return -1 on error, 0 on success. */ static int -simd_vector_register(PyObject *module); +PySIMDVectorType_Init(PyObject *module); /************************************ ** Declarations (inc_convert) @@ -180,13 +198,13 @@ simd_vector_register(PyObject *module); * Raise a Python exception on failure. * * Example: - ** simd_data data = simd_scalar_from_obj(obj, simd_data_f32); + ** simd_data data = simd_scalar_from_number(obj, simd_data_f32); ** if (!PyErr_Occurred()) { ** printf("I have a valid float %d\n", data.f32); ** } */ static simd_data -simd_scalar_from_obj(PyObject *obj, simd_data_type dtype); +simd_scalar_from_number(PyObject *obj, simd_data_type dtype); /** * Create a Python scalar from a C scalar based on the contents * of `data`(simd_data) and according to the scalar data type `dtype` @@ -195,14 +213,14 @@ simd_scalar_from_obj(PyObject *obj, simd_data_type dtype); * * Example: ** simd_data data = {.u32 = 0x7fffffff}; - ** PyObject *obj = simd_scalar_to_obj(data, simd_data_s32); + ** PyObject *obj = simd_scalar_to_number(data, simd_data_s32); ** if (obj != NULL) { ** printf("I have a valid Python integer %d\n", PyLong_AsLong(obj)); ** Py_DECREF(obj); ** } */ static PyObject * -simd_scalar_to_obj(simd_data data, simd_data_type dtype); +simd_scalar_to_number(simd_data data, simd_data_type dtype); /** * Allocate a C array in memory according to number of elements `len` * and sequence data type `dtype` on range(simd_data_[qu8:qf64]). @@ -223,13 +241,13 @@ static void * simd_sequence_new(Py_ssize_t len, simd_data_type dtype); /** * Return the number of elements of the allocated C array `ptr` - * by `simd_sequence_new()` or `simd_sequence_from_obj()`. + * by `simd_sequence_new()` or `simd_sequence_from_iterable()`. */ -static size_t +static Py_ssize_t simd_sequence_len(const void *ptr); /** * Free the allocated C array by `simd_sequence_new()` or - * `simd_sequence_from_obj()`. + * `simd_sequence_from_iterable()`. */ static void simd_sequence_free(void *ptr); @@ -243,7 +261,7 @@ simd_sequence_free(void *ptr); * with a Python exception on failure. * * Example: - ** npyv_lanetype_u32 *ptr = simd_sequence_from_obj(seq_obj, simd_data_qu32, npyv_nlanes_u32); + ** npyv_lanetype_u32 *ptr = simd_sequence_from_iterable(seq_obj, simd_data_qu32, npyv_nlanes_u32); ** if (ptr != NULL) { ** npyv_u32 a = npyv_load_u32(ptr); ** ... @@ -252,25 +270,25 @@ simd_sequence_free(void *ptr); ** */ static void * -simd_sequence_from_obj(PyObject *obj, simd_data_type dtype, unsigned min_size); +simd_sequence_from_iterable(PyObject *obj, simd_data_type dtype, Py_ssize_t min_size); /** * Fill a Python sequence object `obj` with a C array `ptr` allocated by - * `simd_sequence_new()` or `simd_sequence_from_obj()` according to + * `simd_sequence_new()` or `simd_sequence_from_iterable()` according to * to the sequence data type `dtype` on range (simd_data_[qu8:qf64]). * * Return 0 on success or -1 with a Python exception on failure. */ static int -simd_sequence_fill_obj(PyObject *obj, const void *ptr, simd_data_type dtype); +simd_sequence_fill_iterable(PyObject *obj, const void *ptr, simd_data_type dtype); /** * Create a Python list from a C array `ptr` allocated by - * `simd_sequence_new()` or `simd_sequence_from_obj()` according to + * `simd_sequence_new()` or `simd_sequence_from_iterable()` according to * to the sequence data type `dtype` on range (simd_data_[qu8:qf64]). * * Return NULL and a Python exception on failure, otherwise new reference. */ static PyObject * -simd_sequence_to_obj(const void *ptr, simd_data_type dtype); +simd_sequence_to_list(const void *ptr, simd_data_type dtype); /** * Return a SIMD multi-vector(simd_data) representation of Python tuple of * (simd_vector*,) `obj` according to the scalar data type `dtype` @@ -279,7 +297,7 @@ simd_sequence_to_obj(const void *ptr, simd_data_type dtype); * Raise a Python exception on failure. * * Example: - ** simd_data data = simd_vectorx_from_obj(tuple_obj, simd_data_vf32x2); + ** simd_data data = simd_vectorx_from_tuple(tuple_obj, simd_data_vf32x2); ** if (!PyErr_Occurred()) { ** npyv_f32 sum = npyv_add_f32(data.vf32x2.val[0], data.vf32x2.val[1]); ** ... @@ -287,7 +305,7 @@ simd_sequence_to_obj(const void *ptr, simd_data_type dtype); ** */ static simd_data -simd_vectorx_from_obj(PyObject *obj, simd_data_type dtype); +simd_vectorx_from_tuple(PyObject *obj, simd_data_type dtype); /** * Create a Python tuple of 'simd_vector' from a SIMD multi-vector * based on the contents of `data`(simd_data) and according to @@ -297,7 +315,7 @@ simd_vectorx_from_obj(PyObject *obj, simd_data_type dtype); * Return NULL and a Python exception on failure, otherwise new reference. */ static PyObject * -simd_vectorx_to_obj(simd_data data, simd_data_type dtype); +simd_vectorx_to_tuple(simd_data data, simd_data_type dtype); /************************************ ** Declarations (inc_arg) @@ -320,7 +338,7 @@ typedef struct * Return -1 and raise Python exception on failure, otherwise return 0. * * Notes: - * - requires `simd_args_sequence_free()` or `simd_sequence_free()` + * - requires `simd_arg_free()` or `simd_sequence_free()` * to free allocated C array, in case of sequence data types. * - the number of minimum acceptable elements for sequence data * types is the number of lanes of the equivalent vector data type. @@ -333,7 +351,7 @@ typedef struct ** } ** npyv_u8 v_u8 = npyv_load_u8(arg->data.qu8); ** ... - ** simd_args_sequence_free(&arg, 1); + ** simd_arg_free(&arg); * * Example #2: ** simd_arg arg = {.dtype = simd_data_vf32}; @@ -369,36 +387,35 @@ simd_arg_to_obj(const simd_arg *arg); * used with PyArg_Parse*(). * * Notes: - * - requires `simd_args_sequence_free()` or `simd_sequence_free()` + * - requires `simd_arg_free()` or `simd_sequence_free()` * to free allocated C array, in case of sequence data types. * - the number of minimum acceptable elements for sequence data * types is the number of lanes of the equivalent vector data type. * - use 'arg->obj' to retrieve the parameter obj. * * Example: - ** simd_arg req_args[] = { - ** {.dtype = simd_data_qf32}, - ** {.dtype = simd_data_vf32}, - ** }; + ** simd_arg seq_f32 = {.dtype = simd_data_qf32}; + ** simd_arg vec_f32 = {.dtype = simd_data_vf32}; ** if (!PyArg_ParseTuple( ** args, "O&O&:add_sum_f32", - ** simd_arg_converter, &req_args[0], - ** simd_arg_converter, &req_args[1] + ** simd_arg_converter, &seq_f32, + ** simd_arg_converter, &vec_f32 ** )) { ** // fail ** return; ** } - ** npyv_f32 load_a = npyv_load_f32(req_args[0].data.qf32); - ** npyv_f32 sum = npyv_add_f32(load_a, req_args[1].data.vf32); + ** npyv_f32 load_a = npyv_load_f32(seq_f32.data.qf32); + ** npyv_f32 sum = npyv_add_f32(load_a, vec_f32.data.vf32); ** ... - ** simd_args_sequence_free(req_args, 2); + ** simd_arg_free(&seq_f32); */ static int simd_arg_converter(PyObject *obj, simd_arg *arg); /** - * Free the allocated C array for sequence data types. + * Free the allocated C array, if the arg hold sequence data type. */ static void -simd_args_sequence_free(simd_arg *args, int args_len); +simd_arg_free(simd_arg *arg); #endif // NPY_SIMD +#endif // _SIMD_SIMD_INC_H_ diff --git a/numpy/core/src/_simd/_simd_inc_vector.h b/numpy/core/src/_simd/_simd_vector.inc index b0fa17b9a..2a1378f22 100644 --- a/numpy/core/src/_simd/_simd_inc_vector.h +++ b/numpy/core/src/_simd/_simd_vector.inc @@ -1,63 +1,54 @@ -#if !NPY_SIMD - #error "Not a standalone header, only works through 'simd.dispatch.c.src'" -#endif - +/** + * This file is included by `_simd.dispatch.c.src`. Its contents are affected by the simd configuration, and + * therefore must be built multiple times. Making it a standalone `.c` file with `NPY_VISIBILITY_HIDDEN` + * symbols would require judicious use of `NPY_CPU_DISPATCH_DECLARE` and `NPY_CPU_DISPATCH_CURFX`, which was + * deemed too harmful to readability. + */ /************************************ ** Private Definitions ************************************/ -// PySequenceMethods static Py_ssize_t -simd__vector_length(simd_vector *self) +simd__vector_length(PySIMDVectorObject *self) { - return simd_data_getinfo(self->type)->nlanes; + return simd_data_getinfo(self->dtype)->nlanes; } static PyObject * -simd__vector_item(simd_vector *self, Py_ssize_t i) +simd__vector_item(PySIMDVectorObject *self, Py_ssize_t i) { - const simd_data_info *info = simd_data_getinfo(self->type); + const simd_data_info *info = simd_data_getinfo(self->dtype); int nlanes = info->nlanes; if (i >= nlanes) { - PyErr_SetString(PyExc_IndexError, "list index out of range"); + PyErr_SetString(PyExc_IndexError, "vector index out of range"); return NULL; } npyv_lanetype_u8 *src = self->data + i * info->lane_size; simd_data data; memcpy(&data.u64, src, info->lane_size); - return simd_scalar_to_obj(data, info->to_scalar); + return simd_scalar_to_number(data, info->to_scalar); } static PySequenceMethods simd__vector_as_sequence = { - (lenfunc) simd__vector_length, /* sq_length */ - (binaryfunc) NULL, /* sq_concat */ - (ssizeargfunc) NULL, /* sq_repeat */ - (ssizeargfunc) simd__vector_item, /* sq_item */ - (ssizessizeargfunc) NULL, /* sq_slice */ - (ssizeobjargproc) NULL, /* sq_ass_item */ - (ssizessizeobjargproc) NULL, /* sq_ass_slice */ - (objobjproc) NULL, /* sq_contains */ - (binaryfunc) NULL, /* sq_inplace_concat */ - (ssizeargfunc) NULL, /* sq_inplace_repeat */ + .sq_length = (lenfunc) simd__vector_length, + .sq_item = (ssizeargfunc) simd__vector_item }; -// PyGetSetDef static PyObject * -simd__vector_name(simd_vector *self) +simd__vector_name(PySIMDVectorObject *self) { - return PyUnicode_FromString(simd_data_getinfo(self->type)->pyname); + return PyUnicode_FromString(simd_data_getinfo(self->dtype)->pyname); } static PyGetSetDef simd__vector_getset[] = { { "__name__", (getter)simd__vector_name, NULL, NULL, NULL }, { NULL, NULL, NULL, NULL, NULL } }; -// PyTypeObject(simd__vector_type) static PyObject * -simd__vector_repr(PyObject *self) +simd__vector_repr(PySIMDVectorObject *self) { - // PySequence_Fast returns Tuple in PyPy - PyObject *obj = PySequence_List(self); + PyObject *obj = PySequence_List((PyObject*)self); if (obj != NULL) { - PyObject *repr = PyObject_Str(obj); + const char *type_name = simd_data_getinfo(self->dtype)->pyname; + PyObject *repr = PyUnicode_FromFormat("<%s of %R>", type_name, obj); Py_DECREF(obj); return repr; } @@ -81,10 +72,10 @@ simd__vector_compare(PyObject *self, PyObject *other, int cmp_op) } return obj; } -static PyTypeObject simd__vector_type = { +static PyTypeObject PySIMDVectorType = { PyVarObject_HEAD_INIT(NULL, 0) .tp_name = NPY_TOSTRING(NPY_CPU_DISPATCH_CURFX(VECTOR)), - .tp_basicsize = sizeof(simd_vector), + .tp_basicsize = sizeof(PySIMDVectorObject), .tp_repr = (reprfunc)simd__vector_repr, .tp_as_sequence = &simd__vector_as_sequence, .tp_flags = Py_TPFLAGS_DEFAULT, @@ -95,21 +86,21 @@ static PyTypeObject simd__vector_type = { /************************************ ** Protected Definitions ************************************/ -static simd_vector * -simd_vector_to_obj(simd_data data, simd_data_type vtype) +static PySIMDVectorObject * +PySIMDVector_FromData(simd_data data, simd_data_type dtype) { - const simd_data_info *info = simd_data_getinfo(vtype); + const simd_data_info *info = simd_data_getinfo(dtype); assert(info->is_vector && info->nlanes > 0); - simd_vector *vec = PyObject_New(simd_vector, &simd__vector_type); + PySIMDVectorObject *vec = PyObject_New(PySIMDVectorObject, &PySIMDVectorType); if (vec == NULL) { - return (simd_vector*)PyErr_NoMemory(); + return (PySIMDVectorObject*)PyErr_NoMemory(); } - vec->type = vtype; + vec->dtype = dtype; if (info->is_bool) { // boolean vectors are internally treated as unsigned // vectors to add compatibility among all SIMD extensions - switch(vtype) { + switch(dtype) { case simd_data_vb8: data.vu8 = npyv_cvt_u8_b8(data.vb8); break; @@ -128,24 +119,24 @@ simd_vector_to_obj(simd_data data, simd_data_type vtype) } static simd_data -simd_vector_from_obj(simd_vector *vec, simd_data_type vtype) +PySIMDVector_AsData(PySIMDVectorObject *vec, simd_data_type dtype) { - const simd_data_info *info = simd_data_getinfo(vtype); + const simd_data_info *info = simd_data_getinfo(dtype); assert(info->is_vector && info->nlanes > 0); simd_data data = {.u64 = 0}; if (!PyObject_IsInstance( - (PyObject *)vec, (PyObject *)&simd__vector_type + (PyObject *)vec, (PyObject *)&PySIMDVectorType )) { PyErr_Format(PyExc_TypeError, "a vector type %s is required", info->pyname ); return data; } - if (vec->type != vtype) { + if (vec->dtype != dtype) { PyErr_Format(PyExc_TypeError, "a vector type %s is required, got(%s)", - info->pyname, simd_data_getinfo(vec->type)->pyname + info->pyname, simd_data_getinfo(vec->dtype)->pyname ); return data; } @@ -154,7 +145,7 @@ simd_vector_from_obj(simd_vector *vec, simd_data_type vtype) if (info->is_bool) { // boolean vectors are internally treated as unsigned // vectors to add compatibility among all SIMD extensions - switch(vtype) { + switch(dtype) { case simd_data_vb8: data.vb8 = npyv_cvt_b8_u8(data.vu8); break; @@ -172,14 +163,14 @@ simd_vector_from_obj(simd_vector *vec, simd_data_type vtype) } static int -simd_vector_register(PyObject *module) +PySIMDVectorType_Init(PyObject *module) { - Py_INCREF(&simd__vector_type); - if (PyType_Ready(&simd__vector_type)) { + Py_INCREF(&PySIMDVectorType); + if (PyType_Ready(&PySIMDVectorType)) { return -1; } if (PyModule_AddObject( - module, "vector_type",(PyObject *)&simd__vector_type + module, "vector_type",(PyObject *)&PySIMDVectorType )) { return -1; } diff --git a/numpy/core/tests/test_simd.py b/numpy/core/tests/test_simd.py index 3ca6b068d..77a636491 100644 --- a/numpy/core/tests/test_simd.py +++ b/numpy/core/tests/test_simd.py @@ -3,7 +3,7 @@ import pytest from numpy.core._simd import targets -class _Test_Utility(object): +class _Test_Utility: # submodule of the desired SIMD extention, e.g. targets["AVX512F"] npyv = None # the current data type suffix e.g. 's8' @@ -14,10 +14,7 @@ class _Test_Utility(object): To call NPV intrinsics without the prefix 'npyv_' and auto suffixing intrinsics according to class attribute 'sfx' """ - nattr = getattr(self.npyv, attr + "_" + self.sfx) - if callable(nattr): - return lambda *args: nattr(*args) - return nattr + return getattr(self.npyv, attr + "_" + self.sfx) def _data(self, n=None, reverse=False): """ @@ -87,7 +84,7 @@ class _SIMD_INT(_Test_Utility): """ def test_operators_shift(self): if self.sfx in ("u8", "s8"): - pytest.skip("there are no shift intrinsics for npyv_" + self.sfx) + return data_a = self._data(self._int_max() - self.nlanes) data_b = self._data(self._int_min(), reverse=True) @@ -113,7 +110,7 @@ class _SIMD_INT(_Test_Utility): def test_arithmetic_subadd_saturated(self): if self.sfx in ("u32", "s32", "u64", "s64"): - pytest.skip("there are no saturated add/sub intrinsics for npyv_" + self.sfx) + return data_a = self._data(self._int_max() - self.nlanes) data_b = self._data(self._int_min(), reverse=True) @@ -362,7 +359,7 @@ class _SIMD_ALL(_Test_Utility): def test_arithmetic_mul(self): if self.sfx in ("u64", "s64"): - pytest.skip("there is no multiplication intrinsic for npyv_" + self.sfx) + return if self._is_fp(): data_a = self._data() @@ -377,7 +374,7 @@ class _SIMD_ALL(_Test_Utility): def test_arithmetic_div(self): if not self._is_fp(): - pytest.skip("there is no division intrinsic for npyv_" + self.sfx) + return data_a, data_b = self._data(), self._data(reverse=True) vdata_a, vdata_b = self.load(data_a), self.load(data_b) @@ -387,17 +384,18 @@ class _SIMD_ALL(_Test_Utility): div = self.div(vdata_a, vdata_b) assert div == data_div + int_sfx = ("u8", "s8", "u16", "s16", "u32", "s32", "u64", "s64") fp_sfx = ("f32", "f64") all_sfx = int_sfx + fp_sfx tests_registery = { - int_sfx : "_SIMD_INT", - fp_sfx : "_SIMD_FP", - all_sfx : "_SIMD_ALL" + int_sfx : _SIMD_INT, + fp_sfx : _SIMD_FP, + all_sfx : _SIMD_ALL } -for name, npyv in targets.items(): +for target_name, npyv in targets.items(): simd_width = npyv.simd if npyv else '' - pretty_name = name.split('__') # multi-target separator + pretty_name = target_name.split('__') # multi-target separator if len(pretty_name) > 1: # multi-target pretty_name = f"({' '.join(pretty_name)})" @@ -413,14 +411,12 @@ for name, npyv in targets.items(): elif not npyv.simd_f64: skip_sfx["f64"] = f"target '{pretty_name}' doesn't support double-precision" - for sfxes, class_name in tests_registery.items(): + for sfxes, cls in tests_registery.items(): for sfx in sfxes: skip_m = skip_sfx.get(sfx, skip) + inhr = (cls,) + attr = dict(npyv=targets[target_name], sfx=sfx) + tcls = type(f"Test{cls.__name__}_{simd_width}_{target_name}_{sfx}", inhr, attr) if skip_m: - skip_m = '@pytest.mark.skip(reason="%s")' % skip_m - exec( - f"{skip_m}\n" - f"class Test{class_name}_{simd_width}_{name}_{sfx}({class_name}):\n" - f" npyv = targets['{name}']\n" - f" sfx = '{sfx}'\n" - ) + pytest.mark.skip(reason=skip_m)(tcls) + globals()[tcls.__name__] = tcls diff --git a/numpy/core/tests/test_simd_module.py b/numpy/core/tests/test_simd_module.py index 5bf82c0aa..3d710884a 100644 --- a/numpy/core/tests/test_simd_module.py +++ b/numpy/core/tests/test_simd_module.py @@ -1,16 +1,14 @@ import pytest from numpy.core._simd import targets - -npyv = None -npyv2 = None -for target_name, npyv_mod in targets.items(): - if npyv: - if npyv_mod and npyv_mod.simd: - npyv2 = npyv_mod - break - continue - if npyv_mod and npyv_mod.simd: - npyv = npyv_mod +""" +This testing unit only for checking the sanity of common functionality, +therefore all we need is just to take one submodule that represents any +of enabled SIMD extensions to run the test on it and the second submodule +required to run only one check related to the possibility of mixing +the data types among each submodule. +""" +npyvs = [npyv_mod for npyv_mod in targets.values() if npyv_mod and npyv_mod.simd] +npyv, npyv2 = (npyvs + [None, None])[:2] unsigned_sfx = ["u8", "u16", "u32", "u64"] signed_sfx = ["s8", "s16", "s32", "s64"] @@ -22,73 +20,68 @@ int_sfx = unsigned_sfx + signed_sfx all_sfx = unsigned_sfx + int_sfx @pytest.mark.skipif(not npyv, reason="could not find any SIMD extension with NPYV support") -class Test_SIMD_MODULE(object): - def test_num_lanes(self): - for sfx in all_sfx: - nlanes = getattr(npyv, "nlanes_" + sfx) - vector = getattr(npyv, "setall_" + sfx)(1) - assert len(vector) == nlanes +class Test_SIMD_MODULE: - def test_type_name(self): - for sfx in all_sfx: - vector = getattr(npyv, "setall_" + sfx)(1) - assert vector.__name__ == "npyv_" + sfx + @pytest.mark.parametrize('sfx', all_sfx) + def test_num_lanes(self, sfx): + nlanes = getattr(npyv, "nlanes_" + sfx) + vector = getattr(npyv, "setall_" + sfx)(1) + assert len(vector) == nlanes - def test_raises(self): - def assert_raises(e, callback, *args): - __tracebackhide__ = True # Hide traceback for py.test - try: - callback(*args) - raise AssertionError("expected to raise " + e.__name__) - except e: - pass + @pytest.mark.parametrize('sfx', all_sfx) + def test_type_name(self, sfx): + vector = getattr(npyv, "setall_" + sfx)(1) + assert vector.__name__ == "npyv_" + sfx + def test_raises(self): a, b = [npyv.setall_u32(1)]*2 for sfx in all_sfx: vcb = lambda intrin: getattr(npyv, f"{intrin}_{sfx}") - assert_raises(TypeError, vcb("add"), a) - assert_raises(TypeError, vcb("add"), a, b, a) - assert_raises(TypeError, vcb("setall")) - assert_raises(TypeError, vcb("setall"), [1]) - assert_raises(TypeError, vcb("load"), 1) - assert_raises(ValueError, vcb("load"), [1]) - assert_raises(ValueError, vcb("store"), [1], getattr(npyv, f"reinterpret_{sfx}_u32")(a)) + pytest.raises(TypeError, vcb("add"), a) + pytest.raises(TypeError, vcb("add"), a, b, a) + pytest.raises(TypeError, vcb("setall")) + pytest.raises(TypeError, vcb("setall"), [1]) + pytest.raises(TypeError, vcb("load"), 1) + pytest.raises(ValueError, vcb("load"), [1]) + pytest.raises(ValueError, vcb("store"), [1], getattr(npyv, f"reinterpret_{sfx}_u32")(a)) + @pytest.mark.skipif(not npyv2, reason=( + "could not find a second SIMD extension with NPYV support" + )) + def test_nomix(self): # mix among submodules isn't allowed - if not npyv2: - return + a = npyv.setall_u32(1) a2 = npyv2.setall_u32(1) - assert_raises(TypeError, npyv.add_u32, a2, a2) - assert_raises(TypeError, npyv2.add_u32, a, a) + pytest.raises(TypeError, npyv.add_u32, a2, a2) + pytest.raises(TypeError, npyv2.add_u32, a, a) - def test_unsigned_overflow(self): - for sfx in unsigned_sfx: - nlanes = getattr(npyv, "nlanes_" + sfx) - hfbyte_len = int(sfx[1:])//4 - maxu = int(f"0x{'f'*hfbyte_len}", 16) - maxu_72 = 0xfffffffffffffffff - lane = getattr(npyv, "setall_" + sfx)(maxu_72)[0] - assert lane == maxu - lanes = getattr(npyv, "load_" + sfx)([maxu_72] * nlanes) - assert lanes == [maxu] * nlanes - lane = getattr(npyv, "setall_" + sfx)(-1)[0] - assert lane == maxu - lanes = getattr(npyv, "load_" + sfx)([-1] * nlanes) - assert lanes == [maxu] * nlanes + @pytest.mark.parametrize('sfx', unsigned_sfx) + def test_unsigned_overflow(self, sfx): + nlanes = getattr(npyv, "nlanes_" + sfx) + maxu = (1 << int(sfx[1:])) - 1 + maxu_72 = (1 << 72) - 1 + lane = getattr(npyv, "setall_" + sfx)(maxu_72)[0] + assert lane == maxu + lanes = getattr(npyv, "load_" + sfx)([maxu_72] * nlanes) + assert lanes == [maxu] * nlanes + lane = getattr(npyv, "setall_" + sfx)(-1)[0] + assert lane == maxu + lanes = getattr(npyv, "load_" + sfx)([-1] * nlanes) + assert lanes == [maxu] * nlanes - def test_signed_overflow(self): - for sfx in signed_sfx: - nlanes = getattr(npyv, "nlanes_" + sfx) - maxs_72 = 0x7fffffffffffffffff - lane = getattr(npyv, "setall_" + sfx)(maxs_72)[0] - assert lane == -1 - lanes = getattr(npyv, "load_" + sfx)([maxs_72] * nlanes) - assert lanes == [-1] * nlanes - mins_72 = -0x80000000000000000 - lane = getattr(npyv, "setall_" + sfx)(mins_72)[0] - assert lane == 0 - lanes = getattr(npyv, "load_" + sfx)([mins_72] * nlanes) - assert lanes == [0] * nlanes + @pytest.mark.parametrize('sfx', signed_sfx) + def test_signed_overflow(self, sfx): + nlanes = getattr(npyv, "nlanes_" + sfx) + maxs_72 = (1 << 71) - 1 + lane = getattr(npyv, "setall_" + sfx)(maxs_72)[0] + assert lane == -1 + lanes = getattr(npyv, "load_" + sfx)([maxs_72] * nlanes) + assert lanes == [-1] * nlanes + mins_72 = -1 << 71 + lane = getattr(npyv, "setall_" + sfx)(mins_72)[0] + assert lane == 0 + lanes = getattr(npyv, "load_" + sfx)([mins_72] * nlanes) + assert lanes == [0] * nlanes def test_truncate_f32(self): f32 = npyv.setall_f32(0.1)[0] diff --git a/numpy/distutils/command/build.py b/numpy/distutils/command/build.py index 6025586cd..a4fda537d 100644 --- a/numpy/distutils/command/build.py +++ b/numpy/distutils/command/build.py @@ -47,7 +47,7 @@ class build(old_build): - not part of dispatch-able features(--cpu-dispatch) - not supported by compiler or platform """ - self.simd_test = "BASELINE SSE2 SSE41 SSE42 XOP (FMA3 AVX2) AVX512F AVX512_SKX VSX VSX2 VSX3 NEON ASIMD" + self.simd_test = "BASELINE SSE2 SSE42 XOP FMA4 (FMA3 AVX2) AVX512F AVX512_SKX VSX VSX2 VSX3 NEON ASIMD" def finalize_options(self): build_scripts = self.build_scripts |