summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
authorSayed Adel <seiko@imavr.com>2020-09-15 16:06:14 +0200
committerSayed Adel <seiko@imavr.com>2020-10-27 11:46:58 +0000
commit7d125fb70cb149207171b7181312f9679dd4d451 (patch)
tree8c6cbd3b0dd140160d96b98794f3284857681c82 /numpy
parente19f7a8cfe59226fed6cc7a7da9e881218364d49 (diff)
downloadnumpy-7d125fb70cb149207171b7181312f9679dd4d451.tar.gz
MAINT, TST: Serveral imporvments to _SIMD module
- use plain variables - clean up aligned allocate - use `PyArg_ParseTuple` for empty args - use `Py_ssize_t` instead of `unsigned` and `size_t` - improve coding style - no need for a custom raises assertions - use parametrize instead of inner loops - leave a comment about nature of mode testing unit - shift to get max/min of int72 - add more info to repr of vector object - get ride of exec() and use type() instead - use `.inc` as extension for sub-headers instead of `.h` - add `FMA4` and drop `SSE41` from _SIMD targets Co-authored-by: Eric Wieser <wieser.eric@gmail.com>
Diffstat (limited to 'numpy')
-rw-r--r--numpy/core/setup.py12
-rw-r--r--numpy/core/src/_simd/_simd.dispatch.c.src34
-rw-r--r--numpy/core/src/_simd/_simd_arg.inc (renamed from numpy/core/src/_simd/_simd_inc_arg.h)39
-rw-r--r--numpy/core/src/_simd/_simd_convert.inc (renamed from numpy/core/src/_simd/_simd_inc_convert.h)73
-rw-r--r--numpy/core/src/_simd/_simd_data.inc.src (renamed from numpy/core/src/_simd/_simd_inc_data.h.src)10
-rw-r--r--numpy/core/src/_simd/_simd_easyintrin.inc (renamed from numpy/core/src/_simd/_simd_inc_easyintrin.h)133
-rw-r--r--numpy/core/src/_simd/_simd_inc.h.src107
-rw-r--r--numpy/core/src/_simd/_simd_vector.inc (renamed from numpy/core/src/_simd/_simd_inc_vector.h)87
-rw-r--r--numpy/core/tests/test_simd.py40
-rw-r--r--numpy/core/tests/test_simd_module.py127
-rw-r--r--numpy/distutils/command/build.py2
11 files changed, 323 insertions, 341 deletions
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index e9a9a4e46..68aa0a851 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -983,18 +983,18 @@ def configuration(parent_package='',top_path=None):
join('src', 'common', 'npy_cpu_features.c.src'),
join('src', '_simd', '_simd.c'),
join('src', '_simd', '_simd_inc.h.src'),
- join('src', '_simd', '_simd_inc_data.h.src'),
+ join('src', '_simd', '_simd_data.inc.src'),
join('src', '_simd', '_simd.dispatch.c.src'),
], depends=[
join('src', 'common', 'npy_cpu_dispatch.h'),
join('src', 'common', 'simd', 'simd.h'),
join('src', '_simd', '_simd.h'),
join('src', '_simd', '_simd_inc.h.src'),
- join('src', '_simd', '_simd_inc_data.h.src'),
- join('src', '_simd', '_simd_inc_arg.h'),
- join('src', '_simd', '_simd_inc_convert.h'),
- join('src', '_simd', '_simd_inc_easyintrin.h'),
- join('src', '_simd', '_simd_inc_vector.h'),
+ join('src', '_simd', '_simd_data.inc.src'),
+ join('src', '_simd', '_simd_arg.inc'),
+ join('src', '_simd', '_simd_convert.inc'),
+ join('src', '_simd', '_simd_easyintrin.inc'),
+ join('src', '_simd', '_simd_vector.inc'),
])
config.add_subpackage('tests')
diff --git a/numpy/core/src/_simd/_simd.dispatch.c.src b/numpy/core/src/_simd/_simd.dispatch.c.src
index a776ba37b..1989be7e3 100644
--- a/numpy/core/src/_simd/_simd.dispatch.c.src
+++ b/numpy/core/src/_simd/_simd.dispatch.c.src
@@ -3,11 +3,11 @@
#include "_simd_inc.h"
#if NPY_SIMD
-#include "_simd_inc_data.h"
-#include "_simd_inc_convert.h"
-#include "_simd_inc_vector.h"
-#include "_simd_inc_arg.h"
-#include "_simd_inc_easyintrin.h"
+#include "_simd_data.inc"
+#include "_simd_convert.inc"
+#include "_simd_vector.inc"
+#include "_simd_arg.inc"
+#include "_simd_easyintrin.inc"
/*************************************************************************
* Defining NPYV intrinsics as module functions
@@ -39,26 +39,24 @@ SIMD_IMPL_INTRIN_1(@intrin@_@sfx@, v@sfx@, q@sfx@)
static PyObject *
simd__intrin_@intrin@_@sfx@(PyObject* NPY_UNUSED(self), PyObject *args)
{
- simd_arg req_args[] = {
- {.dtype = simd_data_q@sfx@},
- {.dtype = simd_data_v@sfx@},
- };
+ simd_arg seq_arg = {.dtype = simd_data_q@sfx@};
+ simd_arg vec_arg = {.dtype = simd_data_v@sfx@};
if (!PyArg_ParseTuple(
args, "O&O&:@intrin@_@sfx@",
- simd_arg_converter, &req_args[0],
- simd_arg_converter, &req_args[1]
+ simd_arg_converter, &seq_arg,
+ simd_arg_converter, &vec_arg
)) {
return NULL;
}
npyv_@intrin@_@sfx@(
- req_args[0].data.q@sfx@, req_args[1].data.v@sfx@
+ seq_arg.data.q@sfx@, vec_arg.data.v@sfx@
);
// write-back
- if (simd_sequence_fill_obj(req_args[0].obj, req_args[0].data.q@sfx@, simd_data_q@sfx@)) {
- simd_args_sequence_free(req_args, 2);
+ if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.q@sfx@, simd_data_q@sfx@)) {
+ simd_arg_free(&seq_arg);
return NULL;
}
- simd_args_sequence_free(req_args, 2);
+ simd_arg_free(&seq_arg);
Py_RETURN_NONE;
}
/**end repeat1**/
@@ -89,7 +87,7 @@ SIMD_IMPL_INTRIN_1(reinterpret_@sfx_to@_@sfx@, v@sfx_to@, v@sfx@)
static PyObject *
simd__intrin_@intrin@_@sfx@(PyObject* NPY_UNUSED(self), PyObject *args)
{
- npyv_lanetype_@sfx@ *data = simd_sequence_from_obj(args, simd_data_q@sfx@, npyv_nlanes_@sfx@);
+ npyv_lanetype_@sfx@ *data = simd_sequence_from_iterable(args, simd_data_q@sfx@, npyv_nlanes_@sfx@);
if (data == NULL) {
return NULL;
}
@@ -105,7 +103,7 @@ simd__intrin_@intrin@_@sfx@(PyObject* NPY_UNUSED(self), PyObject *args)
data[64] // for setf
)};
simd_sequence_free(data);
- return (PyObject*)simd_vector_to_obj(r, simd_data_v@sfx@);
+ return (PyObject*)PySIMDVector_FromData(r, simd_data_v@sfx@);
}
/**end repeat1**/
@@ -354,7 +352,7 @@ NPY_CPU_DISPATCH_CURFX(simd_create_module)(void)
goto err;
}
#if NPY_SIMD
- if (simd_vector_register(m)) {
+ if (PySIMDVectorType_Init(m)) {
goto err;
}
/**begin repeat
diff --git a/numpy/core/src/_simd/_simd_inc_arg.h b/numpy/core/src/_simd/_simd_arg.inc
index eabf49c5f..f5bcf5487 100644
--- a/numpy/core/src/_simd/_simd_inc_arg.h
+++ b/numpy/core/src/_simd/_simd_arg.inc
@@ -1,7 +1,9 @@
-#if !NPY_SIMD
- #error "Not a standalone header, only works through 'simd.dispatch.c.src'"
-#endif
-
+/**
+ * This file is included by `_simd.dispatch.c.src`. Its contents are affected by the simd configuration, and
+ * therefore must be built multiple times. Making it a standalone `.c` file with `NPY_VISIBILITY_HIDDEN`
+ * symbols would require judicious use of `NPY_CPU_DISPATCH_DECLARE` and `NPY_CPU_DISPATCH_CURFX`, which was
+ * deemed too harmful to readability.
+ */
/************************************
** Protected Definitions
************************************/
@@ -11,17 +13,17 @@ simd_arg_from_obj(PyObject *obj, simd_arg *arg)
assert(arg->dtype != 0);
const simd_data_info *info = simd_data_getinfo(arg->dtype);
if (info->is_scalar) {
- arg->data = simd_scalar_from_obj(obj, arg->dtype);
+ arg->data = simd_scalar_from_number(obj, arg->dtype);
}
else if (info->is_sequence) {
unsigned min_seq_size = simd_data_getinfo(info->to_vector)->nlanes;
- arg->data.qu8 = simd_sequence_from_obj(obj, arg->dtype, min_seq_size);
+ arg->data.qu8 = simd_sequence_from_iterable(obj, arg->dtype, min_seq_size);
}
else if (info->is_vectorx) {
- arg->data = simd_vectorx_from_obj(obj, arg->dtype);
+ arg->data = simd_vectorx_from_tuple(obj, arg->dtype);
}
else if (info->is_vector) {
- arg->data = simd_vector_from_obj((simd_vector*)obj, arg->dtype);
+ arg->data = PySIMDVector_AsData((PySIMDVectorObject*)obj, arg->dtype);
} else {
arg->data.u64 = 0;
PyErr_Format(PyExc_RuntimeError,
@@ -41,16 +43,16 @@ simd_arg_to_obj(const simd_arg *arg)
assert(arg->dtype != 0);
const simd_data_info *info = simd_data_getinfo(arg->dtype);
if (info->is_scalar) {
- return simd_scalar_to_obj(arg->data, arg->dtype);
+ return simd_scalar_to_number(arg->data, arg->dtype);
}
if (info->is_sequence) {
- return simd_sequence_to_obj(arg->data.qu8, arg->dtype);
+ return simd_sequence_to_list(arg->data.qu8, arg->dtype);
}
if (info->is_vectorx) {
- return simd_vectorx_to_obj(arg->data, arg->dtype);
+ return simd_vectorx_to_tuple(arg->data, arg->dtype);
}
if (info->is_vector) {
- return (PyObject*)simd_vector_to_obj(arg->data, arg->dtype);
+ return (PyObject*)PySIMDVector_FromData(arg->data, arg->dtype);
}
PyErr_Format(PyExc_RuntimeError,
"unhandled arg to object type id:%d, name:%s", arg->dtype, info->pyname
@@ -59,15 +61,10 @@ simd_arg_to_obj(const simd_arg *arg)
}
static void
-simd_args_sequence_free(simd_arg *args, int args_len)
+simd_arg_free(simd_arg *arg)
{
- assert(args_len > 0);
- while (--args_len >= 0) {
- simd_arg *arg = &args[args_len];
- const simd_data_info *info = simd_data_getinfo(arg->dtype);
- if (!info->is_sequence) {
- continue;
- }
+ const simd_data_info *info = simd_data_getinfo(arg->dtype);
+ if (info->is_sequence) {
simd_sequence_free(arg->data.qu8);
}
}
@@ -82,7 +79,7 @@ simd_arg_converter(PyObject *obj, simd_arg *arg)
arg->obj = obj;
return Py_CLEANUP_SUPPORTED;
} else {
- simd_args_sequence_free(arg, 1);
+ simd_arg_free(arg);
}
return 1;
}
diff --git a/numpy/core/src/_simd/_simd_inc_convert.h b/numpy/core/src/_simd/_simd_convert.inc
index 360101247..f5bfc3f50 100644
--- a/numpy/core/src/_simd/_simd_inc_convert.h
+++ b/numpy/core/src/_simd/_simd_convert.inc
@@ -1,12 +1,14 @@
-#if !NPY_SIMD
- #error "Not a standalone header, only works through 'simd.dispatch.c.src'"
-#endif
-
+/**
+ * This file is included by `_simd.dispatch.c.src`. Its contents are affected by the simd configuration, and
+ * therefore must be built multiple times. Making it a standalone `.c` file with `NPY_VISIBILITY_HIDDEN`
+ * symbols would require judicious use of `NPY_CPU_DISPATCH_DECLARE` and `NPY_CPU_DISPATCH_CURFX`, which was
+ * deemed too harmful to readability.
+ */
/************************************
** Protected Definitions
************************************/
static simd_data
-simd_scalar_from_obj(PyObject *obj, simd_data_type dtype)
+simd_scalar_from_number(PyObject *obj, simd_data_type dtype)
{
const simd_data_info *info = simd_data_getinfo(dtype);
assert(info->is_scalar && info->lane_size > 0);
@@ -23,7 +25,7 @@ simd_scalar_from_obj(PyObject *obj, simd_data_type dtype)
}
static PyObject *
-simd_scalar_to_obj(simd_data data, simd_data_type dtype)
+simd_scalar_to_number(simd_data data, simd_data_type dtype)
{
const simd_data_info *info = simd_data_getinfo(dtype);
assert(info->is_scalar && info->lane_size > 0);
@@ -41,43 +43,44 @@ simd_scalar_to_obj(simd_data data, simd_data_type dtype)
return PyLong_FromUnsignedLongLong(data.u64 >> leftb);
}
+typedef struct {
+ Py_ssize_t len;
+ void *ptr;
+} simd__alloc_data;
+
static void *
simd_sequence_new(Py_ssize_t len, simd_data_type dtype)
{
const simd_data_info *info = simd_data_getinfo(dtype);
- assert(info->is_sequence && info->lane_size > 0);
-
- size_t size = NPY_SIMD_WIDTH + sizeof(size_t) + sizeof(size_t*);
- size += len * info->lane_size;
-
- size_t *ptr = malloc(size);
+ assert(len > 0 && info->is_sequence && info->lane_size > 0);
+ size_t size = sizeof(simd__alloc_data) + len * info->lane_size + NPY_SIMD_WIDTH;
+ void *ptr = malloc(size);
if (ptr == NULL) {
return PyErr_NoMemory();
}
- *(ptr++) = len;
- size_t **a_ptr = (size_t**)(
- ((size_t)ptr + NPY_SIMD_WIDTH) & ~(size_t)(NPY_SIMD_WIDTH-1)
+ // align the pointer
+ simd__alloc_data *a_ptr = (simd__alloc_data *)(
+ ((uintptr_t)ptr + sizeof(simd__alloc_data) + NPY_SIMD_WIDTH) & ~(uintptr_t)(NPY_SIMD_WIDTH-1)
);
- a_ptr[-1] = ptr;
+ a_ptr[-1].len = len;
+ a_ptr[-1].ptr = ptr;
return a_ptr;
}
-static size_t
-simd_sequence_len(const void *ptr)
+static Py_ssize_t
+simd_sequence_len(void const *ptr)
{
- size_t *ptrz = ((size_t**)ptr)[-1];
- return *(ptrz-1);
+ return ((simd__alloc_data const*)ptr)[-1].len;
}
static void
simd_sequence_free(void *ptr)
{
- size_t *ptrz = ((size_t**)ptr)[-1];
- free(ptrz-1);
+ free(((simd__alloc_data *)ptr)[-1].ptr);
}
static void *
-simd_sequence_from_obj(PyObject *obj, simd_data_type dtype, unsigned min_size)
+simd_sequence_from_iterable(PyObject *obj, simd_data_type dtype, Py_ssize_t min_size)
{
const simd_data_info *info = simd_data_getinfo(dtype);
assert(info->is_sequence && info->lane_size > 0);
@@ -86,7 +89,7 @@ simd_sequence_from_obj(PyObject *obj, simd_data_type dtype, unsigned min_size)
return NULL;
}
Py_ssize_t seq_size = PySequence_Fast_GET_SIZE(seq_obj);
- if (seq_size < (Py_ssize_t)min_size) {
+ if (seq_size < min_size) {
PyErr_Format(PyExc_ValueError,
"minimum acceptable size of the required sequence is %d, given(%d)",
min_size, seq_size
@@ -99,7 +102,7 @@ simd_sequence_from_obj(PyObject *obj, simd_data_type dtype, unsigned min_size)
}
PyObject **seq_items = PySequence_Fast_ITEMS(seq_obj);
for (Py_ssize_t i = 0; i < seq_size; ++i) {
- simd_data data = simd_scalar_from_obj(seq_items[i], info->to_scalar);
+ simd_data data = simd_scalar_from_number(seq_items[i], info->to_scalar);
npyv_lanetype_u8 *sdst = dst + i * info->lane_size;
memcpy(sdst, &data.u64, info->lane_size);
}
@@ -113,7 +116,7 @@ simd_sequence_from_obj(PyObject *obj, simd_data_type dtype, unsigned min_size)
}
static int
-simd_sequence_fill_obj(PyObject *obj, const void *ptr, simd_data_type dtype)
+simd_sequence_fill_iterable(PyObject *obj, const void *ptr, simd_data_type dtype)
{
const simd_data_info *info = simd_data_getinfo(dtype);
if (!PySequence_Check(obj)) {
@@ -123,12 +126,12 @@ simd_sequence_fill_obj(PyObject *obj, const void *ptr, simd_data_type dtype)
return -1;
}
const npyv_lanetype_u8 *src = ptr;
- Py_ssize_t seq_len = (Py_ssize_t)simd_sequence_len(ptr);
+ Py_ssize_t seq_len = simd_sequence_len(ptr);
for (Py_ssize_t i = 0; i < seq_len; ++i) {
const npyv_lanetype_u8 *ssrc = src + i * info->lane_size;
simd_data data;
memcpy(&data.u64, ssrc, info->lane_size);
- PyObject *item = simd_scalar_to_obj(data, info->to_scalar);
+ PyObject *item = simd_scalar_to_number(data, info->to_scalar);
if (item == NULL) {
return -1;
}
@@ -141,13 +144,13 @@ simd_sequence_fill_obj(PyObject *obj, const void *ptr, simd_data_type dtype)
}
static PyObject *
-simd_sequence_to_obj(const void *ptr, simd_data_type dtype)
+simd_sequence_to_list(const void *ptr, simd_data_type dtype)
{
- PyObject *list = PyList_New((Py_ssize_t)simd_sequence_len(ptr));
+ PyObject *list = PyList_New(simd_sequence_len(ptr));
if (list == NULL) {
return NULL;
}
- if (simd_sequence_fill_obj(list, ptr, dtype) < 0) {
+ if (simd_sequence_fill_iterable(list, ptr, dtype) < 0) {
Py_DECREF(list);
return NULL;
}
@@ -155,7 +158,7 @@ simd_sequence_to_obj(const void *ptr, simd_data_type dtype)
}
static simd_data
-simd_vectorx_from_obj(PyObject *obj, simd_data_type dtype)
+simd_vectorx_from_tuple(PyObject *obj, simd_data_type dtype)
{
const simd_data_info *info = simd_data_getinfo(dtype);
// NPYV currently only supports x2 and x3
@@ -172,7 +175,7 @@ simd_vectorx_from_obj(PyObject *obj, simd_data_type dtype)
for (int i = 0; i < info->is_vectorx; ++i) {
PyObject *item = PyTuple_GET_ITEM(obj, i);
// get the max multi-vec and let the compiler do the rest
- data.vu64x3.val[i] = simd_vector_from_obj((simd_vector*)item, info->to_vector).vu64;
+ data.vu64x3.val[i] = PySIMDVector_AsData((PySIMDVectorObject*)item, info->to_vector).vu64;
if (PyErr_Occurred()) {
return data;
}
@@ -181,7 +184,7 @@ simd_vectorx_from_obj(PyObject *obj, simd_data_type dtype)
}
static PyObject *
-simd_vectorx_to_obj(simd_data data, simd_data_type dtype)
+simd_vectorx_to_tuple(simd_data data, simd_data_type dtype)
{
const simd_data_info *info = simd_data_getinfo(dtype);
// NPYV currently only supports x2 and x3
@@ -194,7 +197,7 @@ simd_vectorx_to_obj(simd_data data, simd_data_type dtype)
for (int i = 0; i < info->is_vectorx; ++i) {
// get the max multi-vector and let the compiler handle the rest
simd_data vdata = {.vu64 = data.vu64x3.val[i]};
- PyObject *item = (PyObject*)simd_vector_to_obj(vdata, info->to_vector);
+ PyObject *item = (PyObject*)PySIMDVector_FromData(vdata, info->to_vector);
if (item == NULL) {
// TODO: improve log add item number
Py_DECREF(tuple);
diff --git a/numpy/core/src/_simd/_simd_inc_data.h.src b/numpy/core/src/_simd/_simd_data.inc.src
index eefac483b..5c796487c 100644
--- a/numpy/core/src/_simd/_simd_inc_data.h.src
+++ b/numpy/core/src/_simd/_simd_data.inc.src
@@ -1,7 +1,9 @@
-#if !NPY_SIMD
- #error "Not a standalone header, only works through 'simd.dispatch.c.src'"
-#endif
-
+/**
+ * This file is included by `_simd.dispatch.c.src`. Its contents are affected by the simd configuration, and
+ * therefore must be built multiple times. Making it a standalone `.c` file with `NPY_VISIBILITY_HIDDEN`
+ * symbols would require judicious use of `NPY_CPU_DISPATCH_DECLARE` and `NPY_CPU_DISPATCH_CURFX`, which was
+ * deemed too harmful to readability.
+ */
/************************************
** Private Definitions
************************************/
diff --git a/numpy/core/src/_simd/_simd_inc_easyintrin.h b/numpy/core/src/_simd/_simd_easyintrin.inc
index 9ff227999..54e7ccf01 100644
--- a/numpy/core/src/_simd/_simd_inc_easyintrin.h
+++ b/numpy/core/src/_simd/_simd_easyintrin.inc
@@ -1,33 +1,19 @@
-#if !NPY_SIMD
- #error "Not a standalone header, only works through 'simd.dispatch.c.src'"
-#endif
-
+/**
+ * This file is included by `_simd.dispatch.c.src`. Its contents are affected by the simd configuration, and
+ * therefore must be built multiple times. Making it a standalone `.c` file with `NPY_VISIBILITY_HIDDEN`
+ * symbols would require judicious use of `NPY_CPU_DISPATCH_DECLARE` and `NPY_CPU_DISPATCH_CURFX`, which was
+ * deemed too harmful to readability.
+ */
#define SIMD_INTRIN_DEF(NAME) \
{ NPY_TOSTRING(NAME), simd__intrin_##NAME, METH_VARARGS, NULL } , // comma
-static int simd__no_arguments(PyObject *args, const char* method_name)
-{
- if (args == NULL) {
- return 0;
- }
- assert(PyTuple_Check(args));
- Py_ssize_t obj_arg_len = PyTuple_GET_SIZE(args);
- if (obj_arg_len != 0) {
- PyErr_Format(PyExc_RuntimeError,
- "%s(), takes no arguments, given(%d)", method_name, obj_arg_len
- );
- return -1;
- }
- return 0;
-}
-
#define SIMD_IMPL_INTRIN_0(NAME, RET) \
static PyObject *simd__intrin_##NAME \
(PyObject* NPY_UNUSED(self), PyObject *args) \
{ \
- if (simd__no_arguments( \
- args, NPY_TOSTRING(NAME) \
- )) return NULL; \
+ if (!PyArg_ParseTuple( \
+ args, ":" NPY_TOSTRING(NAME)) \
+ ) return NULL; \
simd_arg a = { \
.dtype = simd_data_##RET, \
.data = {.RET = npyv_##NAME()}, \
@@ -39,9 +25,9 @@ static int simd__no_arguments(PyObject *args, const char* method_name)
static PyObject *simd__intrin_##NAME \
(PyObject* NPY_UNUSED(self), PyObject *args) \
{ \
- if (simd__no_arguments( \
- args, NPY_TOSTRING(NAME) \
- )) return NULL; \
+ if (!PyArg_ParseTuple( \
+ args, ":" NPY_TOSTRING(NAME)) \
+ ) return NULL; \
npyv_##NAME(); \
Py_RETURN_NONE; \
}
@@ -55,88 +41,87 @@ static int simd__no_arguments(PyObject *args, const char* method_name)
args, "O&:"NPY_TOSTRING(NAME), \
simd_arg_converter, &arg \
)) return NULL; \
- simd_data r = {.RET = npyv_##NAME( \
+ simd_data data = {.RET = npyv_##NAME( \
arg.data.IN0 \
)}; \
- simd_args_sequence_free(&arg, 1); \
- arg.data = r; \
- arg.dtype = simd_data_##RET; \
- return simd_arg_to_obj(&arg); \
+ simd_arg_free(&arg); \
+ simd_arg ret = { \
+ .data = data, .dtype = simd_data_##RET \
+ }; \
+ return simd_arg_to_obj(&ret); \
}
#define SIMD_IMPL_INTRIN_2(NAME, RET, IN0, IN1) \
static PyObject *simd__intrin_##NAME \
(PyObject* NPY_UNUSED(self), PyObject *args) \
{ \
- simd_arg req_args[] = { \
- {.dtype = simd_data_##IN0}, \
- {.dtype = simd_data_##IN1}, \
- }; \
+ simd_arg arg1 = {.dtype = simd_data_##IN0}; \
+ simd_arg arg2 = {.dtype = simd_data_##IN1}; \
if (!PyArg_ParseTuple( \
args, "O&O&:"NPY_TOSTRING(NAME), \
- simd_arg_converter, &req_args[0], \
- simd_arg_converter, &req_args[1] \
+ simd_arg_converter, &arg1, \
+ simd_arg_converter, &arg2 \
)) return NULL; \
- simd_data r = {.RET = npyv_##NAME( \
- req_args[0].data.IN0, \
- req_args[1].data.IN1 \
+ simd_data data = {.RET = npyv_##NAME( \
+ arg1.data.IN0, arg2.data.IN1 \
)}; \
- simd_args_sequence_free(req_args, 2); \
- req_args[0].data = r; \
- req_args[0].dtype = simd_data_##RET; \
- return simd_arg_to_obj(req_args); \
+ simd_arg_free(&arg1); \
+ simd_arg_free(&arg2); \
+ simd_arg ret = { \
+ .data = data, .dtype = simd_data_##RET \
+ }; \
+ return simd_arg_to_obj(&ret); \
}
#define SIMD__REPEAT_2IMM(C, NAME, IN0) \
- C == req_args[1].data.u8 ? NPY_CAT(npyv_, NAME)(req_args[0].data.IN0, C) :
+ C == arg2.data.u8 ? NPY_CAT(npyv_, NAME)(arg1.data.IN0, C) :
#define SIMD_IMPL_INTRIN_2IMM(NAME, RET, IN0, CONST_RNG) \
static PyObject *simd__intrin_##NAME \
(PyObject* NPY_UNUSED(self), PyObject *args) \
{ \
- simd_arg req_args[] = { \
- {.dtype = simd_data_##IN0}, \
- {.dtype = simd_data_u8}, \
- }; \
+ simd_arg arg1 = {.dtype = simd_data_##IN0}; \
+ simd_arg arg2 = {.dtype = simd_data_u8}; \
if (!PyArg_ParseTuple( \
args, "O&O&:"NPY_TOSTRING(NAME), \
- simd_arg_converter, &req_args[0], \
- simd_arg_converter, &req_args[1] \
+ simd_arg_converter, &arg1, \
+ simd_arg_converter, &arg2 \
)) return NULL; \
- simd_data r; \
- r.RET = NPY_CAT(SIMD__IMPL_COUNT_, CONST_RNG)( \
+ simd_data data; \
+ data.RET = NPY_CAT(SIMD__IMPL_COUNT_, CONST_RNG)( \
SIMD__REPEAT_2IMM, NAME, IN0 \
- ) npyv_##NAME(req_args[0].data.IN0, 0); \
- simd_args_sequence_free(req_args, 2); \
- req_args[0].data = r; \
- req_args[0].dtype = simd_data_##RET; \
- return simd_arg_to_obj(req_args); \
+ ) npyv_##NAME(arg1.data.IN0, 0); \
+ simd_arg_free(&arg1); \
+ simd_arg ret = { \
+ .data = data, .dtype = simd_data_##RET \
+ }; \
+ return simd_arg_to_obj(&ret); \
}
#define SIMD_IMPL_INTRIN_3(NAME, RET, IN0, IN1, IN2) \
static PyObject *simd__intrin_##NAME \
(PyObject* NPY_UNUSED(self), PyObject *args) \
{ \
- simd_arg req_args[] = { \
- {.dtype = simd_data_##IN0}, \
- {.dtype = simd_data_##IN1}, \
- {.dtype = simd_data_##IN2}, \
- }; \
+ simd_arg arg1 = {.dtype = simd_data_##IN0}; \
+ simd_arg arg2 = {.dtype = simd_data_##IN1}; \
+ simd_arg arg3 = {.dtype = simd_data_##IN2}; \
if (!PyArg_ParseTuple( \
args, "O&O&O&:"NPY_TOSTRING(NAME), \
- simd_arg_converter, &req_args[0], \
- simd_arg_converter, &req_args[1], \
- simd_arg_converter, &req_args[2] \
+ simd_arg_converter, &arg1, \
+ simd_arg_converter, &arg2, \
+ simd_arg_converter, &arg3 \
)) return NULL; \
- simd_data r = {.RET = npyv_##NAME( \
- req_args[0].data.IN0, \
- req_args[1].data.IN1, \
- req_args[2].data.IN2 \
+ simd_data data = {.RET = npyv_##NAME( \
+ arg1.data.IN0, arg2.data.IN1, \
+ arg3.data.IN2 \
)}; \
- simd_args_sequence_free(req_args, 3); \
- req_args[0].data = r; \
- req_args[0].dtype = simd_data_##RET; \
- return simd_arg_to_obj(req_args); \
+ simd_arg_free(&arg1); \
+ simd_arg_free(&arg2); \
+ simd_arg_free(&arg3); \
+ simd_arg ret = { \
+ .data = data, .dtype = simd_data_##RET \
+ }; \
+ return simd_arg_to_obj(&ret); \
}
/**
* Helper macros for repeating and expand a certain macro.
diff --git a/numpy/core/src/_simd/_simd_inc.h.src b/numpy/core/src/_simd/_simd_inc.h.src
index 530c7a736..9858fc0dc 100644
--- a/numpy/core/src/_simd/_simd_inc.h.src
+++ b/numpy/core/src/_simd/_simd_inc.h.src
@@ -1,6 +1,6 @@
-/**
- * This header works only through '_simd.dispatch.c'
- */
+#ifndef _SIMD_SIMD_INC_H_
+#define _SIMD_SIMD_INC_H_
+
#include <Python.h>
#include "simd/simd.h"
@@ -147,29 +147,47 @@ typedef struct
{
PyObject_HEAD
// vector type id
- simd_data_type type;
+ simd_data_type dtype;
// vector data, aligned for safe casting
npyv_lanetype_u8 NPY_DECL_ALIGNED(NPY_SIMD_WIDTH) data[NPY_SIMD_WIDTH];
-} simd_vector;
+} PySIMDVectorObject;
/**
- * convert simd_data to PyObject(simd_vector),
- * raise Python exception on failure and returns NULL.
+ * Create a Python obj(PySIMDVectorObject) from a NPYV vector based on the contents
+ * of `data`(simd_data) and according to the vector data type `dtype`
+ * on range(simd_data_[vu8:vf64]).
+ * Return NULL and a Python exception on failure, otherwise new reference.
+ *
+ * Example:
+ ** simd_data data = {.vu8 = npyv_setall_u8(0xff)};
+ ** PySIMDVectorObject *obj = PySIMDVector_FromData(data, simd_data_vu8);
+ ** if (obj != NULL) {
+ ** printf("I have a valid vector obj and first element is \n", obj->data[0]);
+ ** Py_DECREF(obj);
+ ** }
*/
-static simd_vector *
-simd_vector_to_obj(simd_data data, simd_data_type vtype);
+static PySIMDVectorObject *
+PySIMDVector_FromData(simd_data data, simd_data_type dtype);
/**
- * convert PyObject(simd_vector) to simd_data,
- * raise Python exception on failure.
+ * Return a NPYV vector(simd_data) representation of `obj`(PySIMDVectorObject) and
+ * according to the vector data type `dtype` on range (simd_data_[vu8:vf64]).
+ * Raise a Python exception on failure.
+ *
+ * Example:
+ ** simd_data data = PySIMDVector_AsData(vec_obj, simd_data_vf32);
+ ** if (!PyErr_Occurred()) {
+ ** npyv_f32 add_1 = npyv_add_f32(data.vf32, npyv_setall_f32(1));
+ ** ...
+ ** }
*/
static simd_data
-simd_vector_from_obj(simd_vector *vec, simd_data_type vtype);
+PySIMDVector_AsData(PySIMDVectorObject *obj, simd_data_type dtype);
/**
- * initialize and register vector type(PyTypeObject) to PyModule,
- * vector type can be reached through attribute 'vector_type'.
+ * initialize and register PySIMDVectorType to certain PyModule,
+ * PySIMDVectorType can be reached through attribute 'vector_type'.
* return -1 on error, 0 on success.
*/
static int
-simd_vector_register(PyObject *module);
+PySIMDVectorType_Init(PyObject *module);
/************************************
** Declarations (inc_convert)
@@ -180,13 +198,13 @@ simd_vector_register(PyObject *module);
* Raise a Python exception on failure.
*
* Example:
- ** simd_data data = simd_scalar_from_obj(obj, simd_data_f32);
+ ** simd_data data = simd_scalar_from_number(obj, simd_data_f32);
** if (!PyErr_Occurred()) {
** printf("I have a valid float %d\n", data.f32);
** }
*/
static simd_data
-simd_scalar_from_obj(PyObject *obj, simd_data_type dtype);
+simd_scalar_from_number(PyObject *obj, simd_data_type dtype);
/**
* Create a Python scalar from a C scalar based on the contents
* of `data`(simd_data) and according to the scalar data type `dtype`
@@ -195,14 +213,14 @@ simd_scalar_from_obj(PyObject *obj, simd_data_type dtype);
*
* Example:
** simd_data data = {.u32 = 0x7fffffff};
- ** PyObject *obj = simd_scalar_to_obj(data, simd_data_s32);
+ ** PyObject *obj = simd_scalar_to_number(data, simd_data_s32);
** if (obj != NULL) {
** printf("I have a valid Python integer %d\n", PyLong_AsLong(obj));
** Py_DECREF(obj);
** }
*/
static PyObject *
-simd_scalar_to_obj(simd_data data, simd_data_type dtype);
+simd_scalar_to_number(simd_data data, simd_data_type dtype);
/**
* Allocate a C array in memory according to number of elements `len`
* and sequence data type `dtype` on range(simd_data_[qu8:qf64]).
@@ -223,13 +241,13 @@ static void *
simd_sequence_new(Py_ssize_t len, simd_data_type dtype);
/**
* Return the number of elements of the allocated C array `ptr`
- * by `simd_sequence_new()` or `simd_sequence_from_obj()`.
+ * by `simd_sequence_new()` or `simd_sequence_from_iterable()`.
*/
-static size_t
+static Py_ssize_t
simd_sequence_len(const void *ptr);
/**
* Free the allocated C array by `simd_sequence_new()` or
- * `simd_sequence_from_obj()`.
+ * `simd_sequence_from_iterable()`.
*/
static void
simd_sequence_free(void *ptr);
@@ -243,7 +261,7 @@ simd_sequence_free(void *ptr);
* with a Python exception on failure.
*
* Example:
- ** npyv_lanetype_u32 *ptr = simd_sequence_from_obj(seq_obj, simd_data_qu32, npyv_nlanes_u32);
+ ** npyv_lanetype_u32 *ptr = simd_sequence_from_iterable(seq_obj, simd_data_qu32, npyv_nlanes_u32);
** if (ptr != NULL) {
** npyv_u32 a = npyv_load_u32(ptr);
** ...
@@ -252,25 +270,25 @@ simd_sequence_free(void *ptr);
**
*/
static void *
-simd_sequence_from_obj(PyObject *obj, simd_data_type dtype, unsigned min_size);
+simd_sequence_from_iterable(PyObject *obj, simd_data_type dtype, Py_ssize_t min_size);
/**
* Fill a Python sequence object `obj` with a C array `ptr` allocated by
- * `simd_sequence_new()` or `simd_sequence_from_obj()` according to
+ * `simd_sequence_new()` or `simd_sequence_from_iterable()` according to
* to the sequence data type `dtype` on range (simd_data_[qu8:qf64]).
*
* Return 0 on success or -1 with a Python exception on failure.
*/
static int
-simd_sequence_fill_obj(PyObject *obj, const void *ptr, simd_data_type dtype);
+simd_sequence_fill_iterable(PyObject *obj, const void *ptr, simd_data_type dtype);
/**
* Create a Python list from a C array `ptr` allocated by
- * `simd_sequence_new()` or `simd_sequence_from_obj()` according to
+ * `simd_sequence_new()` or `simd_sequence_from_iterable()` according to
* to the sequence data type `dtype` on range (simd_data_[qu8:qf64]).
*
* Return NULL and a Python exception on failure, otherwise new reference.
*/
static PyObject *
-simd_sequence_to_obj(const void *ptr, simd_data_type dtype);
+simd_sequence_to_list(const void *ptr, simd_data_type dtype);
/**
* Return a SIMD multi-vector(simd_data) representation of Python tuple of
* (simd_vector*,) `obj` according to the scalar data type `dtype`
@@ -279,7 +297,7 @@ simd_sequence_to_obj(const void *ptr, simd_data_type dtype);
* Raise a Python exception on failure.
*
* Example:
- ** simd_data data = simd_vectorx_from_obj(tuple_obj, simd_data_vf32x2);
+ ** simd_data data = simd_vectorx_from_tuple(tuple_obj, simd_data_vf32x2);
** if (!PyErr_Occurred()) {
** npyv_f32 sum = npyv_add_f32(data.vf32x2.val[0], data.vf32x2.val[1]);
** ...
@@ -287,7 +305,7 @@ simd_sequence_to_obj(const void *ptr, simd_data_type dtype);
**
*/
static simd_data
-simd_vectorx_from_obj(PyObject *obj, simd_data_type dtype);
+simd_vectorx_from_tuple(PyObject *obj, simd_data_type dtype);
/**
* Create a Python tuple of 'simd_vector' from a SIMD multi-vector
* based on the contents of `data`(simd_data) and according to
@@ -297,7 +315,7 @@ simd_vectorx_from_obj(PyObject *obj, simd_data_type dtype);
* Return NULL and a Python exception on failure, otherwise new reference.
*/
static PyObject *
-simd_vectorx_to_obj(simd_data data, simd_data_type dtype);
+simd_vectorx_to_tuple(simd_data data, simd_data_type dtype);
/************************************
** Declarations (inc_arg)
@@ -320,7 +338,7 @@ typedef struct
* Return -1 and raise Python exception on failure, otherwise return 0.
*
* Notes:
- * - requires `simd_args_sequence_free()` or `simd_sequence_free()`
+ * - requires `simd_arg_free()` or `simd_sequence_free()`
* to free allocated C array, in case of sequence data types.
* - the number of minimum acceptable elements for sequence data
* types is the number of lanes of the equivalent vector data type.
@@ -333,7 +351,7 @@ typedef struct
** }
** npyv_u8 v_u8 = npyv_load_u8(arg->data.qu8);
** ...
- ** simd_args_sequence_free(&arg, 1);
+ ** simd_arg_free(&arg);
*
* Example #2:
** simd_arg arg = {.dtype = simd_data_vf32};
@@ -369,36 +387,35 @@ simd_arg_to_obj(const simd_arg *arg);
* used with PyArg_Parse*().
*
* Notes:
- * - requires `simd_args_sequence_free()` or `simd_sequence_free()`
+ * - requires `simd_arg_free()` or `simd_sequence_free()`
* to free allocated C array, in case of sequence data types.
* - the number of minimum acceptable elements for sequence data
* types is the number of lanes of the equivalent vector data type.
* - use 'arg->obj' to retrieve the parameter obj.
*
* Example:
- ** simd_arg req_args[] = {
- ** {.dtype = simd_data_qf32},
- ** {.dtype = simd_data_vf32},
- ** };
+ ** simd_arg seq_f32 = {.dtype = simd_data_qf32};
+ ** simd_arg vec_f32 = {.dtype = simd_data_vf32};
** if (!PyArg_ParseTuple(
** args, "O&O&:add_sum_f32",
- ** simd_arg_converter, &req_args[0],
- ** simd_arg_converter, &req_args[1]
+ ** simd_arg_converter, &seq_f32,
+ ** simd_arg_converter, &vec_f32
** )) {
** // fail
** return;
** }
- ** npyv_f32 load_a = npyv_load_f32(req_args[0].data.qf32);
- ** npyv_f32 sum = npyv_add_f32(load_a, req_args[1].data.vf32);
+ ** npyv_f32 load_a = npyv_load_f32(seq_f32.data.qf32);
+ ** npyv_f32 sum = npyv_add_f32(load_a, vec_f32.data.vf32);
** ...
- ** simd_args_sequence_free(req_args, 2);
+ ** simd_arg_free(&seq_f32);
*/
static int
simd_arg_converter(PyObject *obj, simd_arg *arg);
/**
- * Free the allocated C array for sequence data types.
+ * Free the allocated C array, if the arg hold sequence data type.
*/
static void
-simd_args_sequence_free(simd_arg *args, int args_len);
+simd_arg_free(simd_arg *arg);
#endif // NPY_SIMD
+#endif // _SIMD_SIMD_INC_H_
diff --git a/numpy/core/src/_simd/_simd_inc_vector.h b/numpy/core/src/_simd/_simd_vector.inc
index b0fa17b9a..2a1378f22 100644
--- a/numpy/core/src/_simd/_simd_inc_vector.h
+++ b/numpy/core/src/_simd/_simd_vector.inc
@@ -1,63 +1,54 @@
-#if !NPY_SIMD
- #error "Not a standalone header, only works through 'simd.dispatch.c.src'"
-#endif
-
+/**
+ * This file is included by `_simd.dispatch.c.src`. Its contents are affected by the simd configuration, and
+ * therefore must be built multiple times. Making it a standalone `.c` file with `NPY_VISIBILITY_HIDDEN`
+ * symbols would require judicious use of `NPY_CPU_DISPATCH_DECLARE` and `NPY_CPU_DISPATCH_CURFX`, which was
+ * deemed too harmful to readability.
+ */
/************************************
** Private Definitions
************************************/
-// PySequenceMethods
static Py_ssize_t
-simd__vector_length(simd_vector *self)
+simd__vector_length(PySIMDVectorObject *self)
{
- return simd_data_getinfo(self->type)->nlanes;
+ return simd_data_getinfo(self->dtype)->nlanes;
}
static PyObject *
-simd__vector_item(simd_vector *self, Py_ssize_t i)
+simd__vector_item(PySIMDVectorObject *self, Py_ssize_t i)
{
- const simd_data_info *info = simd_data_getinfo(self->type);
+ const simd_data_info *info = simd_data_getinfo(self->dtype);
int nlanes = info->nlanes;
if (i >= nlanes) {
- PyErr_SetString(PyExc_IndexError, "list index out of range");
+ PyErr_SetString(PyExc_IndexError, "vector index out of range");
return NULL;
}
npyv_lanetype_u8 *src = self->data + i * info->lane_size;
simd_data data;
memcpy(&data.u64, src, info->lane_size);
- return simd_scalar_to_obj(data, info->to_scalar);
+ return simd_scalar_to_number(data, info->to_scalar);
}
static PySequenceMethods simd__vector_as_sequence = {
- (lenfunc) simd__vector_length, /* sq_length */
- (binaryfunc) NULL, /* sq_concat */
- (ssizeargfunc) NULL, /* sq_repeat */
- (ssizeargfunc) simd__vector_item, /* sq_item */
- (ssizessizeargfunc) NULL, /* sq_slice */
- (ssizeobjargproc) NULL, /* sq_ass_item */
- (ssizessizeobjargproc) NULL, /* sq_ass_slice */
- (objobjproc) NULL, /* sq_contains */
- (binaryfunc) NULL, /* sq_inplace_concat */
- (ssizeargfunc) NULL, /* sq_inplace_repeat */
+ .sq_length = (lenfunc) simd__vector_length,
+ .sq_item = (ssizeargfunc) simd__vector_item
};
-// PyGetSetDef
static PyObject *
-simd__vector_name(simd_vector *self)
+simd__vector_name(PySIMDVectorObject *self)
{
- return PyUnicode_FromString(simd_data_getinfo(self->type)->pyname);
+ return PyUnicode_FromString(simd_data_getinfo(self->dtype)->pyname);
}
static PyGetSetDef simd__vector_getset[] = {
{ "__name__", (getter)simd__vector_name, NULL, NULL, NULL },
{ NULL, NULL, NULL, NULL, NULL }
};
-// PyTypeObject(simd__vector_type)
static PyObject *
-simd__vector_repr(PyObject *self)
+simd__vector_repr(PySIMDVectorObject *self)
{
- // PySequence_Fast returns Tuple in PyPy
- PyObject *obj = PySequence_List(self);
+ PyObject *obj = PySequence_List((PyObject*)self);
if (obj != NULL) {
- PyObject *repr = PyObject_Str(obj);
+ const char *type_name = simd_data_getinfo(self->dtype)->pyname;
+ PyObject *repr = PyUnicode_FromFormat("<%s of %R>", type_name, obj);
Py_DECREF(obj);
return repr;
}
@@ -81,10 +72,10 @@ simd__vector_compare(PyObject *self, PyObject *other, int cmp_op)
}
return obj;
}
-static PyTypeObject simd__vector_type = {
+static PyTypeObject PySIMDVectorType = {
PyVarObject_HEAD_INIT(NULL, 0)
.tp_name = NPY_TOSTRING(NPY_CPU_DISPATCH_CURFX(VECTOR)),
- .tp_basicsize = sizeof(simd_vector),
+ .tp_basicsize = sizeof(PySIMDVectorObject),
.tp_repr = (reprfunc)simd__vector_repr,
.tp_as_sequence = &simd__vector_as_sequence,
.tp_flags = Py_TPFLAGS_DEFAULT,
@@ -95,21 +86,21 @@ static PyTypeObject simd__vector_type = {
/************************************
** Protected Definitions
************************************/
-static simd_vector *
-simd_vector_to_obj(simd_data data, simd_data_type vtype)
+static PySIMDVectorObject *
+PySIMDVector_FromData(simd_data data, simd_data_type dtype)
{
- const simd_data_info *info = simd_data_getinfo(vtype);
+ const simd_data_info *info = simd_data_getinfo(dtype);
assert(info->is_vector && info->nlanes > 0);
- simd_vector *vec = PyObject_New(simd_vector, &simd__vector_type);
+ PySIMDVectorObject *vec = PyObject_New(PySIMDVectorObject, &PySIMDVectorType);
if (vec == NULL) {
- return (simd_vector*)PyErr_NoMemory();
+ return (PySIMDVectorObject*)PyErr_NoMemory();
}
- vec->type = vtype;
+ vec->dtype = dtype;
if (info->is_bool) {
// boolean vectors are internally treated as unsigned
// vectors to add compatibility among all SIMD extensions
- switch(vtype) {
+ switch(dtype) {
case simd_data_vb8:
data.vu8 = npyv_cvt_u8_b8(data.vb8);
break;
@@ -128,24 +119,24 @@ simd_vector_to_obj(simd_data data, simd_data_type vtype)
}
static simd_data
-simd_vector_from_obj(simd_vector *vec, simd_data_type vtype)
+PySIMDVector_AsData(PySIMDVectorObject *vec, simd_data_type dtype)
{
- const simd_data_info *info = simd_data_getinfo(vtype);
+ const simd_data_info *info = simd_data_getinfo(dtype);
assert(info->is_vector && info->nlanes > 0);
simd_data data = {.u64 = 0};
if (!PyObject_IsInstance(
- (PyObject *)vec, (PyObject *)&simd__vector_type
+ (PyObject *)vec, (PyObject *)&PySIMDVectorType
)) {
PyErr_Format(PyExc_TypeError,
"a vector type %s is required", info->pyname
);
return data;
}
- if (vec->type != vtype) {
+ if (vec->dtype != dtype) {
PyErr_Format(PyExc_TypeError,
"a vector type %s is required, got(%s)",
- info->pyname, simd_data_getinfo(vec->type)->pyname
+ info->pyname, simd_data_getinfo(vec->dtype)->pyname
);
return data;
}
@@ -154,7 +145,7 @@ simd_vector_from_obj(simd_vector *vec, simd_data_type vtype)
if (info->is_bool) {
// boolean vectors are internally treated as unsigned
// vectors to add compatibility among all SIMD extensions
- switch(vtype) {
+ switch(dtype) {
case simd_data_vb8:
data.vb8 = npyv_cvt_b8_u8(data.vu8);
break;
@@ -172,14 +163,14 @@ simd_vector_from_obj(simd_vector *vec, simd_data_type vtype)
}
static int
-simd_vector_register(PyObject *module)
+PySIMDVectorType_Init(PyObject *module)
{
- Py_INCREF(&simd__vector_type);
- if (PyType_Ready(&simd__vector_type)) {
+ Py_INCREF(&PySIMDVectorType);
+ if (PyType_Ready(&PySIMDVectorType)) {
return -1;
}
if (PyModule_AddObject(
- module, "vector_type",(PyObject *)&simd__vector_type
+ module, "vector_type",(PyObject *)&PySIMDVectorType
)) {
return -1;
}
diff --git a/numpy/core/tests/test_simd.py b/numpy/core/tests/test_simd.py
index 3ca6b068d..77a636491 100644
--- a/numpy/core/tests/test_simd.py
+++ b/numpy/core/tests/test_simd.py
@@ -3,7 +3,7 @@
import pytest
from numpy.core._simd import targets
-class _Test_Utility(object):
+class _Test_Utility:
# submodule of the desired SIMD extention, e.g. targets["AVX512F"]
npyv = None
# the current data type suffix e.g. 's8'
@@ -14,10 +14,7 @@ class _Test_Utility(object):
To call NPV intrinsics without the prefix 'npyv_' and
auto suffixing intrinsics according to class attribute 'sfx'
"""
- nattr = getattr(self.npyv, attr + "_" + self.sfx)
- if callable(nattr):
- return lambda *args: nattr(*args)
- return nattr
+ return getattr(self.npyv, attr + "_" + self.sfx)
def _data(self, n=None, reverse=False):
"""
@@ -87,7 +84,7 @@ class _SIMD_INT(_Test_Utility):
"""
def test_operators_shift(self):
if self.sfx in ("u8", "s8"):
- pytest.skip("there are no shift intrinsics for npyv_" + self.sfx)
+ return
data_a = self._data(self._int_max() - self.nlanes)
data_b = self._data(self._int_min(), reverse=True)
@@ -113,7 +110,7 @@ class _SIMD_INT(_Test_Utility):
def test_arithmetic_subadd_saturated(self):
if self.sfx in ("u32", "s32", "u64", "s64"):
- pytest.skip("there are no saturated add/sub intrinsics for npyv_" + self.sfx)
+ return
data_a = self._data(self._int_max() - self.nlanes)
data_b = self._data(self._int_min(), reverse=True)
@@ -362,7 +359,7 @@ class _SIMD_ALL(_Test_Utility):
def test_arithmetic_mul(self):
if self.sfx in ("u64", "s64"):
- pytest.skip("there is no multiplication intrinsic for npyv_" + self.sfx)
+ return
if self._is_fp():
data_a = self._data()
@@ -377,7 +374,7 @@ class _SIMD_ALL(_Test_Utility):
def test_arithmetic_div(self):
if not self._is_fp():
- pytest.skip("there is no division intrinsic for npyv_" + self.sfx)
+ return
data_a, data_b = self._data(), self._data(reverse=True)
vdata_a, vdata_b = self.load(data_a), self.load(data_b)
@@ -387,17 +384,18 @@ class _SIMD_ALL(_Test_Utility):
div = self.div(vdata_a, vdata_b)
assert div == data_div
+
int_sfx = ("u8", "s8", "u16", "s16", "u32", "s32", "u64", "s64")
fp_sfx = ("f32", "f64")
all_sfx = int_sfx + fp_sfx
tests_registery = {
- int_sfx : "_SIMD_INT",
- fp_sfx : "_SIMD_FP",
- all_sfx : "_SIMD_ALL"
+ int_sfx : _SIMD_INT,
+ fp_sfx : _SIMD_FP,
+ all_sfx : _SIMD_ALL
}
-for name, npyv in targets.items():
+for target_name, npyv in targets.items():
simd_width = npyv.simd if npyv else ''
- pretty_name = name.split('__') # multi-target separator
+ pretty_name = target_name.split('__') # multi-target separator
if len(pretty_name) > 1:
# multi-target
pretty_name = f"({' '.join(pretty_name)})"
@@ -413,14 +411,12 @@ for name, npyv in targets.items():
elif not npyv.simd_f64:
skip_sfx["f64"] = f"target '{pretty_name}' doesn't support double-precision"
- for sfxes, class_name in tests_registery.items():
+ for sfxes, cls in tests_registery.items():
for sfx in sfxes:
skip_m = skip_sfx.get(sfx, skip)
+ inhr = (cls,)
+ attr = dict(npyv=targets[target_name], sfx=sfx)
+ tcls = type(f"Test{cls.__name__}_{simd_width}_{target_name}_{sfx}", inhr, attr)
if skip_m:
- skip_m = '@pytest.mark.skip(reason="%s")' % skip_m
- exec(
- f"{skip_m}\n"
- f"class Test{class_name}_{simd_width}_{name}_{sfx}({class_name}):\n"
- f" npyv = targets['{name}']\n"
- f" sfx = '{sfx}'\n"
- )
+ pytest.mark.skip(reason=skip_m)(tcls)
+ globals()[tcls.__name__] = tcls
diff --git a/numpy/core/tests/test_simd_module.py b/numpy/core/tests/test_simd_module.py
index 5bf82c0aa..3d710884a 100644
--- a/numpy/core/tests/test_simd_module.py
+++ b/numpy/core/tests/test_simd_module.py
@@ -1,16 +1,14 @@
import pytest
from numpy.core._simd import targets
-
-npyv = None
-npyv2 = None
-for target_name, npyv_mod in targets.items():
- if npyv:
- if npyv_mod and npyv_mod.simd:
- npyv2 = npyv_mod
- break
- continue
- if npyv_mod and npyv_mod.simd:
- npyv = npyv_mod
+"""
+This testing unit only for checking the sanity of common functionality,
+therefore all we need is just to take one submodule that represents any
+of enabled SIMD extensions to run the test on it and the second submodule
+required to run only one check related to the possibility of mixing
+the data types among each submodule.
+"""
+npyvs = [npyv_mod for npyv_mod in targets.values() if npyv_mod and npyv_mod.simd]
+npyv, npyv2 = (npyvs + [None, None])[:2]
unsigned_sfx = ["u8", "u16", "u32", "u64"]
signed_sfx = ["s8", "s16", "s32", "s64"]
@@ -22,73 +20,68 @@ int_sfx = unsigned_sfx + signed_sfx
all_sfx = unsigned_sfx + int_sfx
@pytest.mark.skipif(not npyv, reason="could not find any SIMD extension with NPYV support")
-class Test_SIMD_MODULE(object):
- def test_num_lanes(self):
- for sfx in all_sfx:
- nlanes = getattr(npyv, "nlanes_" + sfx)
- vector = getattr(npyv, "setall_" + sfx)(1)
- assert len(vector) == nlanes
+class Test_SIMD_MODULE:
- def test_type_name(self):
- for sfx in all_sfx:
- vector = getattr(npyv, "setall_" + sfx)(1)
- assert vector.__name__ == "npyv_" + sfx
+ @pytest.mark.parametrize('sfx', all_sfx)
+ def test_num_lanes(self, sfx):
+ nlanes = getattr(npyv, "nlanes_" + sfx)
+ vector = getattr(npyv, "setall_" + sfx)(1)
+ assert len(vector) == nlanes
- def test_raises(self):
- def assert_raises(e, callback, *args):
- __tracebackhide__ = True # Hide traceback for py.test
- try:
- callback(*args)
- raise AssertionError("expected to raise " + e.__name__)
- except e:
- pass
+ @pytest.mark.parametrize('sfx', all_sfx)
+ def test_type_name(self, sfx):
+ vector = getattr(npyv, "setall_" + sfx)(1)
+ assert vector.__name__ == "npyv_" + sfx
+ def test_raises(self):
a, b = [npyv.setall_u32(1)]*2
for sfx in all_sfx:
vcb = lambda intrin: getattr(npyv, f"{intrin}_{sfx}")
- assert_raises(TypeError, vcb("add"), a)
- assert_raises(TypeError, vcb("add"), a, b, a)
- assert_raises(TypeError, vcb("setall"))
- assert_raises(TypeError, vcb("setall"), [1])
- assert_raises(TypeError, vcb("load"), 1)
- assert_raises(ValueError, vcb("load"), [1])
- assert_raises(ValueError, vcb("store"), [1], getattr(npyv, f"reinterpret_{sfx}_u32")(a))
+ pytest.raises(TypeError, vcb("add"), a)
+ pytest.raises(TypeError, vcb("add"), a, b, a)
+ pytest.raises(TypeError, vcb("setall"))
+ pytest.raises(TypeError, vcb("setall"), [1])
+ pytest.raises(TypeError, vcb("load"), 1)
+ pytest.raises(ValueError, vcb("load"), [1])
+ pytest.raises(ValueError, vcb("store"), [1], getattr(npyv, f"reinterpret_{sfx}_u32")(a))
+ @pytest.mark.skipif(not npyv2, reason=(
+ "could not find a second SIMD extension with NPYV support"
+ ))
+ def test_nomix(self):
# mix among submodules isn't allowed
- if not npyv2:
- return
+ a = npyv.setall_u32(1)
a2 = npyv2.setall_u32(1)
- assert_raises(TypeError, npyv.add_u32, a2, a2)
- assert_raises(TypeError, npyv2.add_u32, a, a)
+ pytest.raises(TypeError, npyv.add_u32, a2, a2)
+ pytest.raises(TypeError, npyv2.add_u32, a, a)
- def test_unsigned_overflow(self):
- for sfx in unsigned_sfx:
- nlanes = getattr(npyv, "nlanes_" + sfx)
- hfbyte_len = int(sfx[1:])//4
- maxu = int(f"0x{'f'*hfbyte_len}", 16)
- maxu_72 = 0xfffffffffffffffff
- lane = getattr(npyv, "setall_" + sfx)(maxu_72)[0]
- assert lane == maxu
- lanes = getattr(npyv, "load_" + sfx)([maxu_72] * nlanes)
- assert lanes == [maxu] * nlanes
- lane = getattr(npyv, "setall_" + sfx)(-1)[0]
- assert lane == maxu
- lanes = getattr(npyv, "load_" + sfx)([-1] * nlanes)
- assert lanes == [maxu] * nlanes
+ @pytest.mark.parametrize('sfx', unsigned_sfx)
+ def test_unsigned_overflow(self, sfx):
+ nlanes = getattr(npyv, "nlanes_" + sfx)
+ maxu = (1 << int(sfx[1:])) - 1
+ maxu_72 = (1 << 72) - 1
+ lane = getattr(npyv, "setall_" + sfx)(maxu_72)[0]
+ assert lane == maxu
+ lanes = getattr(npyv, "load_" + sfx)([maxu_72] * nlanes)
+ assert lanes == [maxu] * nlanes
+ lane = getattr(npyv, "setall_" + sfx)(-1)[0]
+ assert lane == maxu
+ lanes = getattr(npyv, "load_" + sfx)([-1] * nlanes)
+ assert lanes == [maxu] * nlanes
- def test_signed_overflow(self):
- for sfx in signed_sfx:
- nlanes = getattr(npyv, "nlanes_" + sfx)
- maxs_72 = 0x7fffffffffffffffff
- lane = getattr(npyv, "setall_" + sfx)(maxs_72)[0]
- assert lane == -1
- lanes = getattr(npyv, "load_" + sfx)([maxs_72] * nlanes)
- assert lanes == [-1] * nlanes
- mins_72 = -0x80000000000000000
- lane = getattr(npyv, "setall_" + sfx)(mins_72)[0]
- assert lane == 0
- lanes = getattr(npyv, "load_" + sfx)([mins_72] * nlanes)
- assert lanes == [0] * nlanes
+ @pytest.mark.parametrize('sfx', signed_sfx)
+ def test_signed_overflow(self, sfx):
+ nlanes = getattr(npyv, "nlanes_" + sfx)
+ maxs_72 = (1 << 71) - 1
+ lane = getattr(npyv, "setall_" + sfx)(maxs_72)[0]
+ assert lane == -1
+ lanes = getattr(npyv, "load_" + sfx)([maxs_72] * nlanes)
+ assert lanes == [-1] * nlanes
+ mins_72 = -1 << 71
+ lane = getattr(npyv, "setall_" + sfx)(mins_72)[0]
+ assert lane == 0
+ lanes = getattr(npyv, "load_" + sfx)([mins_72] * nlanes)
+ assert lanes == [0] * nlanes
def test_truncate_f32(self):
f32 = npyv.setall_f32(0.1)[0]
diff --git a/numpy/distutils/command/build.py b/numpy/distutils/command/build.py
index 6025586cd..a4fda537d 100644
--- a/numpy/distutils/command/build.py
+++ b/numpy/distutils/command/build.py
@@ -47,7 +47,7 @@ class build(old_build):
- not part of dispatch-able features(--cpu-dispatch)
- not supported by compiler or platform
"""
- self.simd_test = "BASELINE SSE2 SSE41 SSE42 XOP (FMA3 AVX2) AVX512F AVX512_SKX VSX VSX2 VSX3 NEON ASIMD"
+ self.simd_test = "BASELINE SSE2 SSE42 XOP FMA4 (FMA3 AVX2) AVX512F AVX512_SKX VSX VSX2 VSX3 NEON ASIMD"
def finalize_options(self):
build_scripts = self.build_scripts