MAINT, TST: Serveral imporvments to _SIMD module

- use plain variables - clean up aligned allocate - use `PyArg_ParseTuple` for empty args - use `Py_ssize_t` instead of `unsigned` and `size_t` - improve coding style - no need for a custom raises assertions - use parametrize instead of inner loops - leave a comment about nature of mode testing unit - shift to get max/min of int72 - add more info to repr of vector object - get ride of exec() and use type() instead - use `.inc` as extension for sub-headers instead of `.h` - add `FMA4` and drop `SSE41` from _SIMD targets Co-authored-by: Eric Wieser <wieser.eric@gmail.com>
author: Sayed Adel <seiko@imavr.com> 2020-09-15 16:06:14 +0200
committer: Sayed Adel <seiko@imavr.com> 2020-10-27 11:46:58 +0000
commit: 7d125fb70cb149207171b7181312f9679dd4d451 (patch)
tree: 8c6cbd3b0dd140160d96b98794f3284857681c82 /numpy
parent: e19f7a8cfe59226fed6cc7a7da9e881218364d49 (diff)
download: numpy-7d125fb70cb149207171b7181312f9679dd4d451.tar.gz
11 files changed, 323 insertions, 341 deletions
diff --git a/numpy/core/setup.py b/numpy/core/setup.py
index e9a9a4e46..68aa0a851 100644
--- a/numpy/core/setup.py
+++ b/numpy/core/setup.py
@@ -983,18 +983,18 @@ def configuration(parent_package='',top_path=None):
         join('src', 'common', 'npy_cpu_features.c.src'),
         join('src', '_simd', '_simd.c'),
         join('src', '_simd', '_simd_inc.h.src'),
-        join('src', '_simd', '_simd_inc_data.h.src'),
+        join('src', '_simd', '_simd_data.inc.src'),
         join('src', '_simd', '_simd.dispatch.c.src'),
     ], depends=[
         join('src', 'common', 'npy_cpu_dispatch.h'),
         join('src', 'common', 'simd', 'simd.h'),
         join('src', '_simd', '_simd.h'),
         join('src', '_simd', '_simd_inc.h.src'),
-        join('src', '_simd', '_simd_inc_data.h.src'),
-        join('src', '_simd', '_simd_inc_arg.h'),
-        join('src', '_simd', '_simd_inc_convert.h'),
-        join('src', '_simd', '_simd_inc_easyintrin.h'),
-        join('src', '_simd', '_simd_inc_vector.h'),
+        join('src', '_simd', '_simd_data.inc.src'),
+        join('src', '_simd', '_simd_arg.inc'),
+        join('src', '_simd', '_simd_convert.inc'),
+        join('src', '_simd', '_simd_easyintrin.inc'),
+        join('src', '_simd', '_simd_vector.inc'),
     ])
 
     config.add_subpackage('tests')
diff --git a/numpy/core/src/_simd/_simd.dispatch.c.src b/numpy/core/src/_simd/_simd.dispatch.c.src
index a776ba37b..1989be7e3 100644
--- a/numpy/core/src/_simd/_simd.dispatch.c.src
+++ b/numpy/core/src/_simd/_simd.dispatch.c.src
@@ -3,11 +3,11 @@
 #include "_simd_inc.h"
 
 #if NPY_SIMD
-#include "_simd_inc_data.h"
-#include "_simd_inc_convert.h"
-#include "_simd_inc_vector.h"
-#include "_simd_inc_arg.h"
-#include "_simd_inc_easyintrin.h"
+#include "_simd_data.inc"
+#include "_simd_convert.inc"
+#include "_simd_vector.inc"
+#include "_simd_arg.inc"
+#include "_simd_easyintrin.inc"
 
 /*************************************************************************
  * Defining NPYV intrinsics as module functions
@@ -39,26 +39,24 @@ SIMD_IMPL_INTRIN_1(@intrin@_@sfx@, v@sfx@, q@sfx@)
 static PyObject *
 simd__intrin_@intrin@_@sfx@(PyObject* NPY_UNUSED(self), PyObject *args)
 {
-    simd_arg req_args[] = {
-        {.dtype = simd_data_q@sfx@},
-        {.dtype = simd_data_v@sfx@},
-    };
+    simd_arg seq_arg = {.dtype = simd_data_q@sfx@};
+    simd_arg vec_arg = {.dtype = simd_data_v@sfx@};
     if (!PyArg_ParseTuple(
         args, "O&O&:@intrin@_@sfx@",
-        simd_arg_converter, &req_args[0],
-        simd_arg_converter, &req_args[1]
+        simd_arg_converter, &seq_arg,
+        simd_arg_converter, &vec_arg
     )) {
         return NULL;
     }
     npyv_@intrin@_@sfx@(
-        req_args[0].data.q@sfx@, req_args[1].data.v@sfx@
+        seq_arg.data.q@sfx@, vec_arg.data.v@sfx@
     );
     // write-back
-    if (simd_sequence_fill_obj(req_args[0].obj, req_args[0].data.q@sfx@, simd_data_q@sfx@)) {
-        simd_args_sequence_free(req_args, 2);
+    if (simd_sequence_fill_iterable(seq_arg.obj, seq_arg.data.q@sfx@, simd_data_q@sfx@)) {
+        simd_arg_free(&seq_arg);
         return NULL;
     }
-    simd_args_sequence_free(req_args, 2);
+    simd_arg_free(&seq_arg);
     Py_RETURN_NONE;
 }
 /**end repeat1**/
@@ -89,7 +87,7 @@ SIMD_IMPL_INTRIN_1(reinterpret_@sfx_to@_@sfx@, v@sfx_to@, v@sfx@)
 static PyObject *
 simd__intrin_@intrin@_@sfx@(PyObject* NPY_UNUSED(self), PyObject *args)
 {
-    npyv_lanetype_@sfx@ *data = simd_sequence_from_obj(args, simd_data_q@sfx@, npyv_nlanes_@sfx@);
+    npyv_lanetype_@sfx@ *data = simd_sequence_from_iterable(args, simd_data_q@sfx@, npyv_nlanes_@sfx@);
     if (data == NULL) {
         return NULL;
     }
@@ -105,7 +103,7 @@ simd__intrin_@intrin@_@sfx@(PyObject* NPY_UNUSED(self), PyObject *args)
         data[64] // for setf
     )};
     simd_sequence_free(data);
-    return (PyObject*)simd_vector_to_obj(r, simd_data_v@sfx@);
+    return (PyObject*)PySIMDVector_FromData(r, simd_data_v@sfx@);
 }
 /**end repeat1**/
 
@@ -354,7 +352,7 @@ NPY_CPU_DISPATCH_CURFX(simd_create_module)(void)
         goto err;
     }
 #if NPY_SIMD
-    if (simd_vector_register(m)) {
+    if (PySIMDVectorType_Init(m)) {
         goto err;
     }
     /**begin repeat
diff --git a/numpy/core/src/_simd/_simd_inc_arg.h b/numpy/core/src/_simd/_simd_arg.inc
index eabf49c5f..f5bcf5487 100644
--- a/numpy/core/src/_simd/_simd_inc_arg.h
+++ b/numpy/core/src/_simd/_simd_arg.inc
@@ -1,7 +1,9 @@
-#if !NPY_SIMD
-    #error "Not a standalone header, only works through 'simd.dispatch.c.src'"
-#endif
-
+/**
+ * This file is included by `_simd.dispatch.c.src`. Its contents are affected by the simd configuration, and
+ * therefore must be built multiple times. Making it a standalone `.c` file with `NPY_VISIBILITY_HIDDEN`
+ * symbols would require judicious use of `NPY_CPU_DISPATCH_DECLARE` and `NPY_CPU_DISPATCH_CURFX`, which was
+ * deemed too harmful to readability.
+ */
 /************************************
  ** Protected Definitions
  ************************************/
@@ -11,17 +13,17 @@ simd_arg_from_obj(PyObject *obj, simd_arg *arg)
     assert(arg->dtype != 0);
     const simd_data_info *info = simd_data_getinfo(arg->dtype);
     if (info->is_scalar) {
-        arg->data = simd_scalar_from_obj(obj, arg->dtype);
+        arg->data = simd_scalar_from_number(obj, arg->dtype);
     }
     else if (info->is_sequence) {
         unsigned min_seq_size = simd_data_getinfo(info->to_vector)->nlanes;
-        arg->data.qu8 = simd_sequence_from_obj(obj, arg->dtype, min_seq_size);
+        arg->data.qu8 = simd_sequence_from_iterable(obj, arg->dtype, min_seq_size);
     }
     else if (info->is_vectorx) {
-        arg->data = simd_vectorx_from_obj(obj, arg->dtype);
+        arg->data = simd_vectorx_from_tuple(obj, arg->dtype);
     }
     else if (info->is_vector) {
-        arg->data = simd_vector_from_obj((simd_vector*)obj, arg->dtype);
+        arg->data = PySIMDVector_AsData((PySIMDVectorObject*)obj, arg->dtype);
     } else {
         arg->data.u64 = 0;
         PyErr_Format(PyExc_RuntimeError,
@@ -41,16 +43,16 @@ simd_arg_to_obj(const simd_arg *arg)
     assert(arg->dtype != 0);
     const simd_data_info *info = simd_data_getinfo(arg->dtype);
     if (info->is_scalar) {
-        return simd_scalar_to_obj(arg->data, arg->dtype);
+        return simd_scalar_to_number(arg->data, arg->dtype);
     }
     if (info->is_sequence) {
-        return simd_sequence_to_obj(arg->data.qu8, arg->dtype);
+        return simd_sequence_to_list(arg->data.qu8, arg->dtype);
     }
     if (info->is_vectorx) {
-        return simd_vectorx_to_obj(arg->data, arg->dtype);
+        return simd_vectorx_to_tuple(arg->data, arg->dtype);
     }
     if (info->is_vector) {
-        return (PyObject*)simd_vector_to_obj(arg->data, arg->dtype);
+        return (PyObject*)PySIMDVector_FromData(arg->data, arg->dtype);
     }
     PyErr_Format(PyExc_RuntimeError,
         "unhandled arg to object type id:%d, name:%s", arg->dtype, info->pyname
@@ -59,15 +61,10 @@ simd_arg_to_obj(const simd_arg *arg)
 }
 
 static void
-simd_args_sequence_free(simd_arg *args, int args_len)
+simd_arg_free(simd_arg *arg)
 {
-    assert(args_len > 0);
-    while (--args_len >= 0) {
-        simd_arg *arg = &args[args_len];
-        const simd_data_info *info = simd_data_getinfo(arg->dtype);
-        if (!info->is_sequence) {
-            continue;
-        }
+    const simd_data_info *info = simd_data_getinfo(arg->dtype);
+    if (info->is_sequence) {
         simd_sequence_free(arg->data.qu8);
     }
 }
@@ -82,7 +79,7 @@ simd_arg_converter(PyObject *obj, simd_arg *arg)
         arg->obj = obj;
         return Py_CLEANUP_SUPPORTED;
     } else {
-        simd_args_sequence_free(arg, 1);
+        simd_arg_free(arg);
     }
     return 1;
 }
diff --git a/numpy/core/src/_simd/_simd_inc_convert.h b/numpy/core/src/_simd/_simd_convert.inc
index 360101247..f5bfc3f50 100644
--- a/numpy/core/src/_simd/_simd_inc_convert.h
+++ b/numpy/core/src/_simd/_simd_convert.inc
@@ -1,12 +1,14 @@
-#if !NPY_SIMD
-    #error "Not a standalone header, only works through 'simd.dispatch.c.src'"
-#endif
-
+/**
+ * This file is included by `_simd.dispatch.c.src`. Its contents are affected by the simd configuration, and
+ * therefore must be built multiple times. Making it a standalone `.c` file with `NPY_VISIBILITY_HIDDEN`
+ * symbols would require judicious use of `NPY_CPU_DISPATCH_DECLARE` and `NPY_CPU_DISPATCH_CURFX`, which was
+ * deemed too harmful to readability.
+ */
 /************************************
  ** Protected Definitions
  ************************************/
 static simd_data
-simd_scalar_from_obj(PyObject *obj, simd_data_type dtype)
+simd_scalar_from_number(PyObject *obj, simd_data_type dtype)
 {
     const simd_data_info *info = simd_data_getinfo(dtype);
     assert(info->is_scalar && info->lane_size > 0);
@@ -23,7 +25,7 @@ simd_scalar_from_obj(PyObject *obj, simd_data_type dtype)
 }
 
 static PyObject *
-simd_scalar_to_obj(simd_data data, simd_data_type dtype)
+simd_scalar_to_number(simd_data data, simd_data_type dtype)
 {
     const simd_data_info *info = simd_data_getinfo(dtype);
     assert(info->is_scalar && info->lane_size > 0);
@@ -41,43 +43,44 @@ simd_scalar_to_obj(simd_data data, simd_data_type dtype)
     return PyLong_FromUnsignedLongLong(data.u64 >> leftb);
 }
 
+typedef struct {
+    Py_ssize_t len;
+    void *ptr;
+} simd__alloc_data;
+
 static void *
 simd_sequence_new(Py_ssize_t len, simd_data_type dtype)
 {
     const simd_data_info *info = simd_data_getinfo(dtype);
-    assert(info->is_sequence && info->lane_size > 0);
-
-    size_t size  = NPY_SIMD_WIDTH + sizeof(size_t) + sizeof(size_t*);
-           size += len * info->lane_size;
-
-    size_t *ptr = malloc(size);
+    assert(len > 0 && info->is_sequence && info->lane_size > 0);
+    size_t size = sizeof(simd__alloc_data) + len * info->lane_size + NPY_SIMD_WIDTH;
+    void *ptr = malloc(size);
     if (ptr == NULL) {
         return PyErr_NoMemory();
     }
-    *(ptr++) = len;
-    size_t **a_ptr = (size_t**)(
-        ((size_t)ptr + NPY_SIMD_WIDTH) & ~(size_t)(NPY_SIMD_WIDTH-1)
+    // align the pointer
+    simd__alloc_data *a_ptr = (simd__alloc_data *)(
+        ((uintptr_t)ptr + sizeof(simd__alloc_data) + NPY_SIMD_WIDTH) & ~(uintptr_t)(NPY_SIMD_WIDTH-1)
     );
-    a_ptr[-1] = ptr;
+    a_ptr[-1].len = len;
+    a_ptr[-1].ptr = ptr;
     return a_ptr;
 }
 
-static size_t
-simd_sequence_len(const void *ptr)
+static Py_ssize_t
+simd_sequence_len(void const *ptr)
 {
-    size_t *ptrz = ((size_t**)ptr)[-1];
-    return *(ptrz-1);
+    return ((simd__alloc_data const*)ptr)[-1].len;
 }
 
 static void
 simd_sequence_free(void *ptr)
 {
-    size_t *ptrz = ((size_t**)ptr)[-1];
-    free(ptrz-1);
+    free(((simd__alloc_data *)ptr)[-1].ptr);
 }
 
 static void *
-simd_sequence_from_obj(PyObject *obj, simd_data_type dtype, unsigned min_size)
+simd_sequence_from_iterable(PyObject *obj, simd_data_type dtype, Py_ssize_t min_size)
 {
     const simd_data_info *info = simd_data_getinfo(dtype);
     assert(info->is_sequence && info->lane_size > 0);
@@ -86,7 +89,7 @@ simd_sequence_from_obj(PyObject *obj, simd_data_type dtype, unsigned min_size)
         return NULL;
     }
     Py_ssize_t seq_size = PySequence_Fast_GET_SIZE(seq_obj);
-    if (seq_size < (Py_ssize_t)min_size) {
+    if (seq_size < min_size) {
         PyErr_Format(PyExc_ValueError,
             "minimum acceptable size of the required sequence is %d, given(%d)",
             min_size, seq_size
@@ -99,7 +102,7 @@ simd_sequence_from_obj(PyObject *obj, simd_data_type dtype, unsigned min_size)
     }
     PyObject **seq_items = PySequence_Fast_ITEMS(seq_obj);
     for (Py_ssize_t i = 0; i < seq_size; ++i) {
-        simd_data data = simd_scalar_from_obj(seq_items[i], info->to_scalar);
+        simd_data data = simd_scalar_from_number(seq_items[i], info->to_scalar);
         npyv_lanetype_u8 *sdst = dst + i * info->lane_size;
         memcpy(sdst, &data.u64, info->lane_size);
     }
@@ -113,7 +116,7 @@ simd_sequence_from_obj(PyObject *obj, simd_data_type dtype, unsigned min_size)
 }
 
 static int
-simd_sequence_fill_obj(PyObject *obj, const void *ptr, simd_data_type dtype)
+simd_sequence_fill_iterable(PyObject *obj, const void *ptr, simd_data_type dtype)
 {
     const simd_data_info *info = simd_data_getinfo(dtype);
     if (!PySequence_Check(obj)) {
@@ -123,12 +126,12 @@ simd_sequence_fill_obj(PyObject *obj, const void *ptr, simd_data_type dtype)
         return -1;
     }
     const npyv_lanetype_u8 *src = ptr;
-    Py_ssize_t seq_len = (Py_ssize_t)simd_sequence_len(ptr);
+    Py_ssize_t seq_len = simd_sequence_len(ptr);
     for (Py_ssize_t i = 0; i < seq_len; ++i) {
         const npyv_lanetype_u8 *ssrc = src + i * info->lane_size;
         simd_data data;
         memcpy(&data.u64, ssrc, info->lane_size);
-        PyObject *item = simd_scalar_to_obj(data, info->to_scalar);
+        PyObject *item = simd_scalar_to_number(data, info->to_scalar);
         if (item == NULL) {
             return -1;
         }
@@ -141,13 +144,13 @@ simd_sequence_fill_obj(PyObject *obj, const void *ptr, simd_data_type dtype)
 }
 
 static PyObject *
-simd_sequence_to_obj(const void *ptr, simd_data_type dtype)
+simd_sequence_to_list(const void *ptr, simd_data_type dtype)
 {
-    PyObject *list = PyList_New((Py_ssize_t)simd_sequence_len(ptr));
+    PyObject *list = PyList_New(simd_sequence_len(ptr));
     if (list == NULL) {
         return NULL;
     }
-    if (simd_sequence_fill_obj(list, ptr, dtype) < 0) {
+    if (simd_sequence_fill_iterable(list, ptr, dtype) < 0) {
         Py_DECREF(list);
         return NULL;
     }
@@ -155,7 +158,7 @@ simd_sequence_to_obj(const void *ptr, simd_data_type dtype)
 }
 
 static simd_data
-simd_vectorx_from_obj(PyObject *obj, simd_data_type dtype)
+simd_vectorx_from_tuple(PyObject *obj, simd_data_type dtype)
 {
     const simd_data_info *info = simd_data_getinfo(dtype);
     // NPYV currently only supports x2 and x3
@@ -172,7 +175,7 @@ simd_vectorx_from_obj(PyObject *obj, simd_data_type dtype)
     for (int i = 0; i < info->is_vectorx; ++i) {
         PyObject *item = PyTuple_GET_ITEM(obj, i);
         // get the max multi-vec and let the compiler do the rest
-        data.vu64x3.val[i] = simd_vector_from_obj((simd_vector*)item, info->to_vector).vu64;
+        data.vu64x3.val[i] = PySIMDVector_AsData((PySIMDVectorObject*)item, info->to_vector).vu64;
         if (PyErr_Occurred()) {
             return data;
         }
@@ -181,7 +184,7 @@ simd_vectorx_from_obj(PyObject *obj, simd_data_type dtype)
 }
 
 static PyObject *
-simd_vectorx_to_obj(simd_data data, simd_data_type dtype)
+simd_vectorx_to_tuple(simd_data data, simd_data_type dtype)
 {
     const simd_data_info *info = simd_data_getinfo(dtype);
     // NPYV currently only supports x2 and x3
@@ -194,7 +197,7 @@ simd_vectorx_to_obj(simd_data data, simd_data_type dtype)
     for (int i = 0; i < info->is_vectorx; ++i) {
         // get the max multi-vector and let the compiler handle the rest
         simd_data vdata = {.vu64 = data.vu64x3.val[i]};
-        PyObject *item = (PyObject*)simd_vector_to_obj(vdata, info->to_vector);
+        PyObject *item = (PyObject*)PySIMDVector_FromData(vdata, info->to_vector);
         if (item == NULL) {
             // TODO: improve log add item number
             Py_DECREF(tuple);
diff --git a/numpy/core/src/_simd/_simd_inc_data.h.src b/numpy/core/src/_simd/_simd_data.inc.src
index eefac483b..5c796487c 100644
--- a/numpy/core/src/_simd/_simd_inc_data.h.src
+++ b/numpy/core/src/_simd/_simd_data.inc.src
@@ -1,7 +1,9 @@
-#if !NPY_SIMD
-    #error "Not a standalone header, only works through 'simd.dispatch.c.src'"
-#endif
-
+/**
+ * This file is included by `_simd.dispatch.c.src`. Its contents are affected by the simd configuration, and
+ * therefore must be built multiple times. Making it a standalone `.c` file with `NPY_VISIBILITY_HIDDEN`
+ * symbols would require judicious use of `NPY_CPU_DISPATCH_DECLARE` and `NPY_CPU_DISPATCH_CURFX`, which was
+ * deemed too harmful to readability.
+ */
 /************************************
  ** Private Definitions
  ************************************/
diff --git a/numpy/core/src/_simd/_simd_inc_easyintrin.h b/numpy/core/src/_simd/_simd_easyintrin.inc
index 9ff227999..54e7ccf01 100644
--- a/numpy/core/src/_simd/_simd_inc_easyintrin.h
+++ b/numpy/core/src/_simd/_simd_easyintrin.inc
@@ -1,33 +1,19 @@
-#if !NPY_SIMD
-    #error "Not a standalone header, only works through 'simd.dispatch.c.src'"
-#endif
-
+/**
+ * This file is included by `_simd.dispatch.c.src`. Its contents are affected by the simd configuration, and
+ * therefore must be built multiple times. Making it a standalone `.c` file with `NPY_VISIBILITY_HIDDEN`
+ * symbols would require judicious use of `NPY_CPU_DISPATCH_DECLARE` and `NPY_CPU_DISPATCH_CURFX`, which was
+ * deemed too harmful to readability.
+ */
 #define SIMD_INTRIN_DEF(NAME) \
     { NPY_TOSTRING(NAME), simd__intrin_##NAME, METH_VARARGS, NULL } , // comma
 
-static int simd__no_arguments(PyObject *args, const char* method_name)
-{
-    if (args == NULL) {
-        return 0;
-    }
-    assert(PyTuple_Check(args));
-    Py_ssize_t obj_arg_len = PyTuple_GET_SIZE(args);
-    if (obj_arg_len != 0) {
-        PyErr_Format(PyExc_RuntimeError,
-            "%s(), takes no arguments, given(%d)", method_name, obj_arg_len
-        );
-        return -1;
-    }
-    return 0;
-}
-
 #define SIMD_IMPL_INTRIN_0(NAME, RET)                     \
     static PyObject *simd__intrin_##NAME                  \
     (PyObject* NPY_UNUSED(self), PyObject *args)          \
     {                                                     \
-        if (simd__no_arguments(                           \
-            args, NPY_TOSTRING(NAME)                      \
-        )) return NULL;                                   \
+        if (!PyArg_ParseTuple(                            \
+            args, ":" NPY_TOSTRING(NAME))                 \
+        ) return NULL;                                    \
         simd_arg a = {                                    \
             .dtype = simd_data_##RET,                     \
             .data  = {.RET = npyv_##NAME()},              \
@@ -39,9 +25,9 @@ static int simd__no_arguments(PyObject *args, const char* method_name)
     static PyObject *simd__intrin_##NAME                  \
     (PyObject* NPY_UNUSED(self), PyObject *args)          \
     {                                                     \
-        if (simd__no_arguments(                           \
-            args, NPY_TOSTRING(NAME)                      \
-        )) return NULL;                                   \
+        if (!PyArg_ParseTuple(                            \
+            args, ":" NPY_TOSTRING(NAME))                 \
+        ) return NULL;                                    \
         npyv_##NAME();                                    \
         Py_RETURN_NONE;                                   \
     }
@@ -55,88 +41,87 @@ static int simd__no_arguments(PyObject *args, const char* method_name)
             args, "O&:"NPY_TOSTRING(NAME),                \
             simd_arg_converter, &arg                      \
         )) return NULL;                                   \
-        simd_data r = {.RET = npyv_##NAME(                \
+        simd_data data = {.RET = npyv_##NAME(             \
             arg.data.IN0                                  \
         )};                                               \
-        simd_args_sequence_free(&arg, 1);                 \
-        arg.data = r;                                     \
-        arg.dtype = simd_data_##RET;                      \
-        return simd_arg_to_obj(&arg);                     \
+        simd_arg_free(&arg);                              \
+        simd_arg ret = {                                  \
+            .data = data, .dtype = simd_data_##RET        \
+        };                                                \
+        return simd_arg_to_obj(&ret);                     \
     }
 
 #define SIMD_IMPL_INTRIN_2(NAME, RET, IN0, IN1)           \
     static PyObject *simd__intrin_##NAME                  \
     (PyObject* NPY_UNUSED(self), PyObject *args)          \
     {                                                     \
-        simd_arg req_args[] = {                           \
-            {.dtype = simd_data_##IN0},                   \
-            {.dtype = simd_data_##IN1},                   \
-        };                                                \
+        simd_arg arg1 = {.dtype = simd_data_##IN0};       \
+        simd_arg arg2 = {.dtype = simd_data_##IN1};       \
         if (!PyArg_ParseTuple(                            \
             args, "O&O&:"NPY_TOSTRING(NAME),              \
-            simd_arg_converter, &req_args[0],             \
-            simd_arg_converter, &req_args[1]              \
+            simd_arg_converter, &arg1,                    \
+            simd_arg_converter, &arg2                     \
         )) return NULL;                                   \
-        simd_data r = {.RET = npyv_##NAME(                \
-            req_args[0].data.IN0,                         \
-            req_args[1].data.IN1                          \
+        simd_data data = {.RET = npyv_##NAME(             \
+            arg1.data.IN0, arg2.data.IN1                  \
         )};                                               \
-        simd_args_sequence_free(req_args, 2);             \
-        req_args[0].data = r;                             \
-        req_args[0].dtype = simd_data_##RET;              \
-        return simd_arg_to_obj(req_args);                 \
+        simd_arg_free(&arg1);                             \
+        simd_arg_free(&arg2);                             \
+        simd_arg ret = {                                  \
+            .data = data, .dtype = simd_data_##RET        \
+        };                                                \
+        return simd_arg_to_obj(&ret);                     \
     }
 
 #define SIMD__REPEAT_2IMM(C, NAME, IN0) \
-    C == req_args[1].data.u8 ? NPY_CAT(npyv_, NAME)(req_args[0].data.IN0, C) :
+    C == arg2.data.u8 ? NPY_CAT(npyv_, NAME)(arg1.data.IN0, C) :
 
 #define SIMD_IMPL_INTRIN_2IMM(NAME, RET, IN0, CONST_RNG)  \
     static PyObject *simd__intrin_##NAME                  \
     (PyObject* NPY_UNUSED(self), PyObject *args)          \
     {                                                     \
-        simd_arg req_args[] = {                           \
-            {.dtype = simd_data_##IN0},                   \
-            {.dtype = simd_data_u8},                      \
-        };                                                \
+        simd_arg arg1 = {.dtype = simd_data_##IN0};       \
+        simd_arg arg2 = {.dtype = simd_data_u8};          \
         if (!PyArg_ParseTuple(                            \
             args, "O&O&:"NPY_TOSTRING(NAME),              \
-            simd_arg_converter, &req_args[0],             \
-            simd_arg_converter, &req_args[1]              \
+            simd_arg_converter, &arg1,                    \
+            simd_arg_converter, &arg2                     \
         )) return NULL;                                   \
-        simd_data r;                                      \
-        r.RET = NPY_CAT(SIMD__IMPL_COUNT_, CONST_RNG)(    \
+        simd_data data;                                   \
+        data.RET = NPY_CAT(SIMD__IMPL_COUNT_, CONST_RNG)( \
             SIMD__REPEAT_2IMM, NAME, IN0                  \
-        ) npyv_##NAME(req_args[0].data.IN0, 0);           \
-        simd_args_sequence_free(req_args, 2);             \
-        req_args[0].data = r;                             \
-        req_args[0].dtype = simd_data_##RET;              \
-        return simd_arg_to_obj(req_args);                 \
+        ) npyv_##NAME(arg1.data.IN0, 0);                  \
+        simd_arg_free(&arg1);                             \
+        simd_arg ret = {                                  \
+            .data = data, .dtype = simd_data_##RET        \
+        };                                                \
+        return simd_arg_to_obj(&ret);                     \
     }
 
 #define SIMD_IMPL_INTRIN_3(NAME, RET, IN0, IN1, IN2)      \
     static PyObject *simd__intrin_##NAME                  \
     (PyObject* NPY_UNUSED(self), PyObject *args)          \
     {                                                     \
-        simd_arg req_args[] = {                           \
-            {.dtype = simd_data_##IN0},                   \
-            {.dtype = simd_data_##IN1},                   \
-            {.dtype = simd_data_##IN2},                   \
-        };                                                \
+        simd_arg arg1 = {.dtype = simd_data_##IN0};       \
+        simd_arg arg2 = {.dtype = simd_data_##IN1};       \
+        simd_arg arg3 = {.dtype = simd_data_##IN2};       \
         if (!PyArg_ParseTuple(                            \
             args, "O&O&O&:"NPY_TOSTRING(NAME),            \
-            simd_arg_converter, &req_args[0],             \
-            simd_arg_converter, &req_args[1],             \
-            simd_arg_converter, &req_args[2]              \
+            simd_arg_converter, &arg1,                    \
+            simd_arg_converter, &arg2,                    \
+            simd_arg_converter, &arg3                     \
         )) return NULL;                                   \
-        simd_data r = {.RET = npyv_##NAME(                \
-            req_args[0].data.IN0,                         \
-            req_args[1].data.IN1,                         \
-            req_args[2].data.IN2                          \
+        simd_data data = {.RET = npyv_##NAME(             \
+            arg1.data.IN0, arg2.data.IN1,                 \
+            arg3.data.IN2                                 \
         )};                                               \
-        simd_args_sequence_free(req_args, 3);             \
-        req_args[0].data = r;                             \
-        req_args[0].dtype = simd_data_##RET;              \
-        return simd_arg_to_obj(req_args);                 \
+        simd_arg_free(&arg1);                             \
+        simd_arg_free(&arg2);                             \
+        simd_arg_free(&arg3);                             \
+        simd_arg ret = {                                  \
+            .data = data, .dtype = simd_data_##RET        \
+        };                                                \
+        return simd_arg_to_obj(&ret);                     \
     }
 /**
  * Helper macros for repeating and expand a certain macro.
diff --git a/numpy/core/src/_simd/_simd_inc.h.src b/numpy/core/src/_simd/_simd_inc.h.src
index 530c7a736..9858fc0dc 100644
--- a/numpy/core/src/_simd/_simd_inc.h.src
+++ b/numpy/core/src/_simd/_simd_inc.h.src
@@ -1,6 +1,6 @@
-/**
- * This header works only through '_simd.dispatch.c'
- */
+#ifndef _SIMD_SIMD_INC_H_
+#define _SIMD_SIMD_INC_H_
+
 #include <Python.h>
 #include "simd/simd.h"
 
@@ -147,29 +147,47 @@ typedef struct
 {
     PyObject_HEAD
     // vector type id
-    simd_data_type type;
+    simd_data_type dtype;
     // vector data, aligned for safe casting
     npyv_lanetype_u8 NPY_DECL_ALIGNED(NPY_SIMD_WIDTH) data[NPY_SIMD_WIDTH];
-} simd_vector;
+} PySIMDVectorObject;
 /**
- * convert simd_data to PyObject(simd_vector),
- * raise Python exception on failure and returns NULL.
+ * Create a Python obj(PySIMDVectorObject) from a NPYV vector based on the contents
+ * of `data`(simd_data) and according to the vector data type `dtype`
+ * on range(simd_data_[vu8:vf64]).
+ * Return NULL and a Python exception on failure, otherwise new reference.
+ *
+ * Example:
+ ** simd_data data = {.vu8 = npyv_setall_u8(0xff)};
+ ** PySIMDVectorObject *obj = PySIMDVector_FromData(data, simd_data_vu8);
+ ** if (obj != NULL) {
+ **    printf("I have a valid vector obj and first element is \n", obj->data[0]);
+ **    Py_DECREF(obj);
+ ** }
  */
-static simd_vector *
-simd_vector_to_obj(simd_data data, simd_data_type vtype);
+static PySIMDVectorObject *
+PySIMDVector_FromData(simd_data data, simd_data_type dtype);
 /**
- * convert PyObject(simd_vector) to simd_data,
- * raise Python exception on failure.
+ * Return a NPYV vector(simd_data) representation of `obj`(PySIMDVectorObject) and
+ * according to the vector data type `dtype` on range (simd_data_[vu8:vf64]).
+ * Raise a Python exception on failure.
+ *
+ * Example:
+ ** simd_data data = PySIMDVector_AsData(vec_obj, simd_data_vf32);
+ ** if (!PyErr_Occurred()) {
+ **    npyv_f32 add_1 = npyv_add_f32(data.vf32, npyv_setall_f32(1));
+ **    ...
+ ** }
  */
 static simd_data
-simd_vector_from_obj(simd_vector *vec, simd_data_type vtype);
+PySIMDVector_AsData(PySIMDVectorObject *obj, simd_data_type dtype);
 /**
- * initialize and register vector type(PyTypeObject) to PyModule,
- * vector type can be reached through attribute 'vector_type'.
+ * initialize and register PySIMDVectorType to certain PyModule,
+ * PySIMDVectorType can be reached through attribute 'vector_type'.
  * return -1 on error, 0 on success.
  */
 static int
-simd_vector_register(PyObject *module);
+PySIMDVectorType_Init(PyObject *module);
 
 /************************************
  ** Declarations (inc_convert)
@@ -180,13 +198,13 @@ simd_vector_register(PyObject *module);
  * Raise a Python exception on failure.
  *
  * Example:
- ** simd_data data = simd_scalar_from_obj(obj, simd_data_f32);
+ ** simd_data data = simd_scalar_from_number(obj, simd_data_f32);
  ** if (!PyErr_Occurred()) {
  **    printf("I have a valid float %d\n", data.f32);
  ** }
  */
 static simd_data
-simd_scalar_from_obj(PyObject *obj, simd_data_type dtype);
+simd_scalar_from_number(PyObject *obj, simd_data_type dtype);
 /**
  * Create a Python scalar from a C scalar based on the contents
  * of `data`(simd_data) and according to the scalar data type `dtype`
@@ -195,14 +213,14 @@ simd_scalar_from_obj(PyObject *obj, simd_data_type dtype);
  *
  * Example:
  ** simd_data data = {.u32 = 0x7fffffff};
- ** PyObject *obj = simd_scalar_to_obj(data, simd_data_s32);
+ ** PyObject *obj = simd_scalar_to_number(data, simd_data_s32);
  ** if (obj != NULL) {
  **    printf("I have a valid Python integer %d\n", PyLong_AsLong(obj));
  **    Py_DECREF(obj);
  ** }
  */
 static PyObject *
-simd_scalar_to_obj(simd_data data, simd_data_type dtype);
+simd_scalar_to_number(simd_data data, simd_data_type dtype);
 /**
  * Allocate a C array in memory according to number of elements `len`
  * and sequence data type `dtype` on range(simd_data_[qu8:qf64]).
@@ -223,13 +241,13 @@ static void *
 simd_sequence_new(Py_ssize_t len, simd_data_type dtype);
 /**
  * Return the number of elements of the allocated C array `ptr`
- * by `simd_sequence_new()` or `simd_sequence_from_obj()`.
+ * by `simd_sequence_new()` or `simd_sequence_from_iterable()`.
  */
-static size_t
+static Py_ssize_t
 simd_sequence_len(const void *ptr);
 /**
  * Free the allocated C array by `simd_sequence_new()` or
- * `simd_sequence_from_obj()`.
+ * `simd_sequence_from_iterable()`.
  */
 static void
 simd_sequence_free(void *ptr);
@@ -243,7 +261,7 @@ simd_sequence_free(void *ptr);
  * with a Python exception on failure.
  *
  * Example:
- ** npyv_lanetype_u32 *ptr = simd_sequence_from_obj(seq_obj, simd_data_qu32, npyv_nlanes_u32);
+ ** npyv_lanetype_u32 *ptr = simd_sequence_from_iterable(seq_obj, simd_data_qu32, npyv_nlanes_u32);
  ** if (ptr != NULL) {
  **     npyv_u32 a = npyv_load_u32(ptr);
  **     ...
@@ -252,25 +270,25 @@ simd_sequence_free(void *ptr);
  **
  */
 static void *
-simd_sequence_from_obj(PyObject *obj, simd_data_type dtype, unsigned min_size);
+simd_sequence_from_iterable(PyObject *obj, simd_data_type dtype, Py_ssize_t min_size);
 /**
  * Fill a Python sequence object `obj` with a C array `ptr` allocated by
- * `simd_sequence_new()` or `simd_sequence_from_obj()` according to
+ * `simd_sequence_new()` or `simd_sequence_from_iterable()` according to
  * to the sequence data type `dtype` on range (simd_data_[qu8:qf64]).
  *
  * Return 0 on success or -1 with a Python exception on failure.
  */
 static int
-simd_sequence_fill_obj(PyObject *obj, const void *ptr, simd_data_type dtype);
+simd_sequence_fill_iterable(PyObject *obj, const void *ptr, simd_data_type dtype);
 /**
  * Create a Python list from a C array `ptr` allocated by
- * `simd_sequence_new()` or `simd_sequence_from_obj()` according to
+ * `simd_sequence_new()` or `simd_sequence_from_iterable()` according to
  * to the sequence data type `dtype` on range (simd_data_[qu8:qf64]).
  *
  * Return NULL and a Python exception on failure, otherwise new reference.
  */
 static PyObject *
-simd_sequence_to_obj(const void *ptr, simd_data_type dtype);
+simd_sequence_to_list(const void *ptr, simd_data_type dtype);
 /**
  * Return a SIMD multi-vector(simd_data) representation of Python tuple of
  * (simd_vector*,) `obj` according to the scalar data type `dtype`
@@ -279,7 +297,7 @@ simd_sequence_to_obj(const void *ptr, simd_data_type dtype);
  * Raise a Python exception on failure.
  *
  * Example:
- ** simd_data data = simd_vectorx_from_obj(tuple_obj, simd_data_vf32x2);
+ ** simd_data data = simd_vectorx_from_tuple(tuple_obj, simd_data_vf32x2);
  ** if (!PyErr_Occurred()) {
  **     npyv_f32 sum = npyv_add_f32(data.vf32x2.val[0], data.vf32x2.val[1]);
  **     ...
@@ -287,7 +305,7 @@ simd_sequence_to_obj(const void *ptr, simd_data_type dtype);
  **
  */
 static simd_data
-simd_vectorx_from_obj(PyObject *obj, simd_data_type dtype);
+simd_vectorx_from_tuple(PyObject *obj, simd_data_type dtype);
 /**
  * Create a Python tuple of 'simd_vector' from a SIMD multi-vector
  * based on the contents of `data`(simd_data) and according to
@@ -297,7 +315,7 @@ simd_vectorx_from_obj(PyObject *obj, simd_data_type dtype);
  * Return NULL and a Python exception on failure, otherwise new reference.
  */
 static PyObject *
-simd_vectorx_to_obj(simd_data data, simd_data_type dtype);
+simd_vectorx_to_tuple(simd_data data, simd_data_type dtype);
 
 /************************************
  ** Declarations (inc_arg)
@@ -320,7 +338,7 @@ typedef struct
  * Return -1 and raise Python exception on failure, otherwise return 0.
  *
  * Notes:
- *  - requires `simd_args_sequence_free()` or `simd_sequence_free()`
+ *  - requires `simd_arg_free()` or `simd_sequence_free()`
  *    to free allocated C array, in case of sequence data types.
  *  - the number of minimum acceptable elements for sequence data
  *    types is the number of lanes of the equivalent vector data type.
@@ -333,7 +351,7 @@ typedef struct
  ** }
  ** npyv_u8 v_u8 = npyv_load_u8(arg->data.qu8);
  ** ...
- ** simd_args_sequence_free(&arg, 1);
+ ** simd_arg_free(&arg);
  *
  * Example #2:
  ** simd_arg arg = {.dtype = simd_data_vf32};
@@ -369,36 +387,35 @@ simd_arg_to_obj(const simd_arg *arg);
  * used with PyArg_Parse*().
  *
  * Notes:
- *  - requires `simd_args_sequence_free()` or `simd_sequence_free()`
+ *  - requires `simd_arg_free()` or `simd_sequence_free()`
  *    to free allocated C array, in case of sequence data types.
  *  - the number of minimum acceptable elements for sequence data
  *    types is the number of lanes of the equivalent vector data type.
  *  - use 'arg->obj' to retrieve the parameter obj.
  *
  * Example:
- **  simd_arg req_args[] = {
- **     {.dtype = simd_data_qf32},
- **     {.dtype = simd_data_vf32},
- **  };
+ **  simd_arg seq_f32 = {.dtype = simd_data_qf32};
+ **  simd_arg vec_f32 = {.dtype = simd_data_vf32};
  **  if (!PyArg_ParseTuple(
  **     args, "O&O&:add_sum_f32",
- **     simd_arg_converter, &req_args[0],
- **     simd_arg_converter, &req_args[1]
+ **     simd_arg_converter, &seq_f32,
+ **     simd_arg_converter, &vec_f32
  **  )) {
  **     // fail
  **     return;
  **  }
- **  npyv_f32 load_a = npyv_load_f32(req_args[0].data.qf32);
- **  npyv_f32 sum = npyv_add_f32(load_a, req_args[1].data.vf32);
+ **  npyv_f32 load_a = npyv_load_f32(seq_f32.data.qf32);
+ **  npyv_f32 sum = npyv_add_f32(load_a, vec_f32.data.vf32);
  **  ...
- **  simd_args_sequence_free(req_args, 2);
+ **  simd_arg_free(&seq_f32);
  */
 static int
 simd_arg_converter(PyObject *obj, simd_arg *arg);
 /**
- * Free the allocated C array for sequence data types.
+ * Free the allocated C array, if the arg hold sequence data type.
  */
 static void
-simd_args_sequence_free(simd_arg *args, int args_len);
+simd_arg_free(simd_arg *arg);
 
 #endif // NPY_SIMD
+#endif // _SIMD_SIMD_INC_H_
diff --git a/numpy/core/src/_simd/_simd_inc_vector.h b/numpy/core/src/_simd/_simd_vector.inc
index b0fa17b9a..2a1378f22 100644
--- a/numpy/core/src/_simd/_simd_inc_vector.h
+++ b/numpy/core/src/_simd/_simd_vector.inc
@@ -1,63 +1,54 @@
-#if !NPY_SIMD
-    #error "Not a standalone header, only works through 'simd.dispatch.c.src'"
-#endif
-
+/**
+ * This file is included by `_simd.dispatch.c.src`. Its contents are affected by the simd configuration, and
+ * therefore must be built multiple times. Making it a standalone `.c` file with `NPY_VISIBILITY_HIDDEN`
+ * symbols would require judicious use of `NPY_CPU_DISPATCH_DECLARE` and `NPY_CPU_DISPATCH_CURFX`, which was
+ * deemed too harmful to readability.
+ */
 /************************************
  ** Private Definitions
  ************************************/
-// PySequenceMethods
 static Py_ssize_t
-simd__vector_length(simd_vector *self)
+simd__vector_length(PySIMDVectorObject *self)
 {
-    return simd_data_getinfo(self->type)->nlanes;
+    return simd_data_getinfo(self->dtype)->nlanes;
 }
 static PyObject *
-simd__vector_item(simd_vector *self, Py_ssize_t i)
+simd__vector_item(PySIMDVectorObject *self, Py_ssize_t i)
 {
-    const simd_data_info *info = simd_data_getinfo(self->type);
+    const simd_data_info *info = simd_data_getinfo(self->dtype);
     int nlanes = info->nlanes;
     if (i >= nlanes) {
-        PyErr_SetString(PyExc_IndexError, "list index out of range");
+        PyErr_SetString(PyExc_IndexError, "vector index out of range");
         return NULL;
     }
     npyv_lanetype_u8 *src = self->data + i * info->lane_size;
     simd_data data;
     memcpy(&data.u64, src, info->lane_size);
-    return simd_scalar_to_obj(data, info->to_scalar);
+    return simd_scalar_to_number(data, info->to_scalar);
 }
 
 static PySequenceMethods simd__vector_as_sequence = {
-    (lenfunc) simd__vector_length,           /* sq_length */
-    (binaryfunc) NULL,                       /* sq_concat */
-    (ssizeargfunc) NULL,                     /* sq_repeat */
-    (ssizeargfunc) simd__vector_item,        /* sq_item */
-    (ssizessizeargfunc) NULL,                /* sq_slice */
-    (ssizeobjargproc) NULL,                  /* sq_ass_item */
-    (ssizessizeobjargproc) NULL,             /* sq_ass_slice */
-    (objobjproc) NULL,                       /* sq_contains */
-    (binaryfunc) NULL,                       /* sq_inplace_concat */
-    (ssizeargfunc) NULL,                     /* sq_inplace_repeat */
+    .sq_length = (lenfunc) simd__vector_length,
+    .sq_item = (ssizeargfunc) simd__vector_item
 };
 
-// PyGetSetDef
 static PyObject *
-simd__vector_name(simd_vector *self)
+simd__vector_name(PySIMDVectorObject *self)
 {
-    return PyUnicode_FromString(simd_data_getinfo(self->type)->pyname);
+    return PyUnicode_FromString(simd_data_getinfo(self->dtype)->pyname);
 }
 static PyGetSetDef simd__vector_getset[] = {
     { "__name__", (getter)simd__vector_name, NULL, NULL, NULL },
     { NULL, NULL, NULL, NULL, NULL }
 };
 
-// PyTypeObject(simd__vector_type)
 static PyObject *
-simd__vector_repr(PyObject *self)
+simd__vector_repr(PySIMDVectorObject *self)
 {
-    // PySequence_Fast returns Tuple in PyPy
-    PyObject *obj = PySequence_List(self);
+    PyObject *obj = PySequence_List((PyObject*)self);
     if (obj != NULL) {
-        PyObject *repr = PyObject_Str(obj);
+        const char *type_name = simd_data_getinfo(self->dtype)->pyname;
+        PyObject *repr = PyUnicode_FromFormat("<%s of %R>", type_name, obj);
         Py_DECREF(obj);
         return repr;
     }
@@ -81,10 +72,10 @@ simd__vector_compare(PyObject *self, PyObject *other, int cmp_op)
     }
     return obj;
 }
-static PyTypeObject simd__vector_type = {
+static PyTypeObject PySIMDVectorType = {
     PyVarObject_HEAD_INIT(NULL, 0)
     .tp_name = NPY_TOSTRING(NPY_CPU_DISPATCH_CURFX(VECTOR)),
-    .tp_basicsize = sizeof(simd_vector),
+    .tp_basicsize = sizeof(PySIMDVectorObject),
     .tp_repr = (reprfunc)simd__vector_repr,
     .tp_as_sequence = &simd__vector_as_sequence,
     .tp_flags = Py_TPFLAGS_DEFAULT,
@@ -95,21 +86,21 @@ static PyTypeObject simd__vector_type = {
 /************************************
  ** Protected Definitions
  ************************************/
-static simd_vector *
-simd_vector_to_obj(simd_data data, simd_data_type vtype)
+static PySIMDVectorObject *
+PySIMDVector_FromData(simd_data data, simd_data_type dtype)
 {
-    const simd_data_info *info = simd_data_getinfo(vtype);
+    const simd_data_info *info = simd_data_getinfo(dtype);
     assert(info->is_vector && info->nlanes > 0);
 
-    simd_vector *vec = PyObject_New(simd_vector, &simd__vector_type);
+    PySIMDVectorObject *vec = PyObject_New(PySIMDVectorObject, &PySIMDVectorType);
     if (vec == NULL) {
-        return (simd_vector*)PyErr_NoMemory();
+        return (PySIMDVectorObject*)PyErr_NoMemory();
     }
-    vec->type = vtype;
+    vec->dtype = dtype;
     if (info->is_bool) {
         // boolean vectors are internally treated as unsigned
         // vectors to add compatibility among all SIMD extensions
-        switch(vtype) {
+        switch(dtype) {
         case simd_data_vb8:
             data.vu8 = npyv_cvt_u8_b8(data.vb8);
             break;
@@ -128,24 +119,24 @@ simd_vector_to_obj(simd_data data, simd_data_type vtype)
 }
 
 static simd_data
-simd_vector_from_obj(simd_vector *vec, simd_data_type vtype)
+PySIMDVector_AsData(PySIMDVectorObject *vec, simd_data_type dtype)
 {
-    const simd_data_info *info = simd_data_getinfo(vtype);
+    const simd_data_info *info = simd_data_getinfo(dtype);
     assert(info->is_vector && info->nlanes > 0);
 
     simd_data data = {.u64 = 0};
     if (!PyObject_IsInstance(
-        (PyObject *)vec, (PyObject *)&simd__vector_type
+        (PyObject *)vec, (PyObject *)&PySIMDVectorType
     )) {
         PyErr_Format(PyExc_TypeError,
             "a vector type %s is required", info->pyname
         );
         return data;
     }
-    if (vec->type != vtype) {
+    if (vec->dtype != dtype) {
         PyErr_Format(PyExc_TypeError,
             "a vector type %s is required, got(%s)",
-            info->pyname, simd_data_getinfo(vec->type)->pyname
+            info->pyname, simd_data_getinfo(vec->dtype)->pyname
         );
         return data;
     }
@@ -154,7 +145,7 @@ simd_vector_from_obj(simd_vector *vec, simd_data_type vtype)
     if (info->is_bool) {
         // boolean vectors are internally treated as unsigned
         // vectors to add compatibility among all SIMD extensions
-        switch(vtype) {
+        switch(dtype) {
         case simd_data_vb8:
             data.vb8 = npyv_cvt_b8_u8(data.vu8);
             break;
@@ -172,14 +163,14 @@ simd_vector_from_obj(simd_vector *vec, simd_data_type vtype)
 }
 
 static int
-simd_vector_register(PyObject *module)
+PySIMDVectorType_Init(PyObject *module)
 {
-    Py_INCREF(&simd__vector_type);
-    if (PyType_Ready(&simd__vector_type)) {
+    Py_INCREF(&PySIMDVectorType);
+    if (PyType_Ready(&PySIMDVectorType)) {
         return -1;
     }
     if (PyModule_AddObject(
-        module, "vector_type",(PyObject *)&simd__vector_type
+        module, "vector_type",(PyObject *)&PySIMDVectorType
     )) {
         return -1;
     }
diff --git a/numpy/core/tests/test_simd.py b/numpy/core/tests/test_simd.py
index 3ca6b068d..77a636491 100644
--- a/numpy/core/tests/test_simd.py
+++ b/numpy/core/tests/test_simd.py
@@ -3,7 +3,7 @@
 import pytest
 from numpy.core._simd import targets
 
-class _Test_Utility(object):
+class _Test_Utility:
     # submodule of the desired SIMD extention, e.g. targets["AVX512F"]
     npyv = None
     # the current data type suffix e.g. 's8'
@@ -14,10 +14,7 @@ class _Test_Utility(object):
         To call NPV intrinsics without the prefix 'npyv_' and
         auto suffixing intrinsics according to class attribute 'sfx'
         """
-        nattr = getattr(self.npyv, attr + "_" + self.sfx)
-        if callable(nattr):
-            return lambda *args: nattr(*args)
-        return nattr
+        return getattr(self.npyv, attr + "_" + self.sfx)
 
     def _data(self, n=None, reverse=False):
         """
@@ -87,7 +84,7 @@ class _SIMD_INT(_Test_Utility):
     """
     def test_operators_shift(self):
         if self.sfx in ("u8", "s8"):
-            pytest.skip("there are no shift intrinsics for npyv_" + self.sfx)
+            return
 
         data_a = self._data(self._int_max() - self.nlanes)
         data_b = self._data(self._int_min(), reverse=True)
@@ -113,7 +110,7 @@ class _SIMD_INT(_Test_Utility):
 
     def test_arithmetic_subadd_saturated(self):
         if self.sfx in ("u32", "s32", "u64", "s64"):
-            pytest.skip("there are no saturated add/sub intrinsics for npyv_" + self.sfx)
+            return
 
         data_a = self._data(self._int_max() - self.nlanes)
         data_b = self._data(self._int_min(), reverse=True)
@@ -362,7 +359,7 @@ class _SIMD_ALL(_Test_Utility):
 
     def test_arithmetic_mul(self):
         if self.sfx in ("u64", "s64"):
-            pytest.skip("there is no multiplication intrinsic for npyv_" + self.sfx)
+            return
 
         if self._is_fp():
             data_a = self._data()
@@ -377,7 +374,7 @@ class _SIMD_ALL(_Test_Utility):
 
     def test_arithmetic_div(self):
         if not self._is_fp():
-            pytest.skip("there is no division intrinsic for npyv_" + self.sfx)
+            return
 
         data_a, data_b = self._data(), self._data(reverse=True)
         vdata_a, vdata_b = self.load(data_a), self.load(data_b)
@@ -387,17 +384,18 @@ class _SIMD_ALL(_Test_Utility):
         div = self.div(vdata_a, vdata_b)
         assert div == data_div
 
+
 int_sfx = ("u8", "s8", "u16", "s16", "u32", "s32", "u64", "s64")
 fp_sfx  = ("f32", "f64")
 all_sfx = int_sfx + fp_sfx
 tests_registery = {
-    int_sfx : "_SIMD_INT",
-    fp_sfx  : "_SIMD_FP",
-    all_sfx : "_SIMD_ALL"
+    int_sfx : _SIMD_INT,
+    fp_sfx  : _SIMD_FP,
+    all_sfx : _SIMD_ALL
 }
-for name, npyv in targets.items():
+for target_name, npyv in targets.items():
     simd_width = npyv.simd if npyv else ''
-    pretty_name = name.split('__') # multi-target separator
+    pretty_name = target_name.split('__') # multi-target separator
     if len(pretty_name) > 1:
         # multi-target
         pretty_name = f"({' '.join(pretty_name)})"
@@ -413,14 +411,12 @@ for name, npyv in targets.items():
     elif not npyv.simd_f64:
         skip_sfx["f64"] = f"target '{pretty_name}' doesn't support double-precision"
 
-    for sfxes, class_name in tests_registery.items():
+    for sfxes, cls in tests_registery.items():
         for sfx in sfxes:
             skip_m = skip_sfx.get(sfx, skip)
+            inhr = (cls,)
+            attr = dict(npyv=targets[target_name], sfx=sfx)
+            tcls = type(f"Test{cls.__name__}_{simd_width}_{target_name}_{sfx}", inhr, attr)
             if skip_m:
-                skip_m = '@pytest.mark.skip(reason="%s")' % skip_m
-            exec(
-                f"{skip_m}\n"
-                f"class Test{class_name}_{simd_width}_{name}_{sfx}({class_name}):\n"
-                f"   npyv = targets['{name}']\n"
-                f"   sfx  = '{sfx}'\n"
-            )
+                pytest.mark.skip(reason=skip_m)(tcls)
+            globals()[tcls.__name__] = tcls
diff --git a/numpy/core/tests/test_simd_module.py b/numpy/core/tests/test_simd_module.py
index 5bf82c0aa..3d710884a 100644
--- a/numpy/core/tests/test_simd_module.py
+++ b/numpy/core/tests/test_simd_module.py
@@ -1,16 +1,14 @@
 import pytest
 from numpy.core._simd import targets
-
-npyv = None
-npyv2 = None
-for target_name, npyv_mod in targets.items():
-    if npyv:
-        if npyv_mod and npyv_mod.simd:
-            npyv2 = npyv_mod
-            break
-        continue
-    if npyv_mod and npyv_mod.simd:
-        npyv = npyv_mod
+"""
+This testing unit only for checking the sanity of common functionality,
+therefore all we need is just to take one submodule that represents any
+of enabled SIMD extensions to run the test on it and the second submodule
+required to run only one check related to the possibility of mixing
+the data types among each submodule.
+"""
+npyvs = [npyv_mod for npyv_mod in targets.values() if npyv_mod and npyv_mod.simd]
+npyv, npyv2 = (npyvs + [None, None])[:2]
 
 unsigned_sfx = ["u8", "u16", "u32", "u64"]
 signed_sfx = ["s8", "s16", "s32", "s64"]
@@ -22,73 +20,68 @@ int_sfx = unsigned_sfx + signed_sfx
 all_sfx = unsigned_sfx + int_sfx
 
 @pytest.mark.skipif(not npyv, reason="could not find any SIMD extension with NPYV support")
-class Test_SIMD_MODULE(object):
-    def test_num_lanes(self):
-        for sfx in all_sfx:
-            nlanes = getattr(npyv, "nlanes_" + sfx)
-            vector = getattr(npyv, "setall_" + sfx)(1)
-            assert len(vector) == nlanes
+class Test_SIMD_MODULE:
 
-    def test_type_name(self):
-        for sfx in all_sfx:
-            vector = getattr(npyv, "setall_" + sfx)(1)
-            assert vector.__name__ == "npyv_" + sfx
+    @pytest.mark.parametrize('sfx', all_sfx)
+    def test_num_lanes(self, sfx):
+        nlanes = getattr(npyv, "nlanes_" + sfx)
+        vector = getattr(npyv, "setall_" + sfx)(1)
+        assert len(vector) == nlanes
 
-    def test_raises(self):
-        def assert_raises(e, callback, *args):
-            __tracebackhide__ = True  # Hide traceback for py.test
-            try:
-                callback(*args)
-                raise AssertionError("expected to raise " + e.__name__)
-            except e:
-                pass
+    @pytest.mark.parametrize('sfx', all_sfx)
+    def test_type_name(self, sfx):
+        vector = getattr(npyv, "setall_" + sfx)(1)
+        assert vector.__name__ == "npyv_" + sfx
 
+    def test_raises(self):
         a, b = [npyv.setall_u32(1)]*2
         for sfx in all_sfx:
             vcb = lambda intrin: getattr(npyv, f"{intrin}_{sfx}")
-            assert_raises(TypeError, vcb("add"), a)
-            assert_raises(TypeError, vcb("add"), a, b, a)
-            assert_raises(TypeError, vcb("setall"))
-            assert_raises(TypeError, vcb("setall"), [1])
-            assert_raises(TypeError, vcb("load"), 1)
-            assert_raises(ValueError, vcb("load"), [1])
-            assert_raises(ValueError, vcb("store"), [1], getattr(npyv, f"reinterpret_{sfx}_u32")(a))
+            pytest.raises(TypeError, vcb("add"), a)
+            pytest.raises(TypeError, vcb("add"), a, b, a)
+            pytest.raises(TypeError, vcb("setall"))
+            pytest.raises(TypeError, vcb("setall"), [1])
+            pytest.raises(TypeError, vcb("load"), 1)
+            pytest.raises(ValueError, vcb("load"), [1])
+            pytest.raises(ValueError, vcb("store"), [1], getattr(npyv, f"reinterpret_{sfx}_u32")(a))
 
+    @pytest.mark.skipif(not npyv2, reason=(
+        "could not find a second SIMD extension with NPYV support"
+    ))
+    def test_nomix(self):
         # mix among submodules isn't allowed
-        if not npyv2:
-            return
+        a = npyv.setall_u32(1)
         a2 = npyv2.setall_u32(1)
-        assert_raises(TypeError, npyv.add_u32, a2, a2)
-        assert_raises(TypeError, npyv2.add_u32, a, a)
+        pytest.raises(TypeError, npyv.add_u32, a2, a2)
+        pytest.raises(TypeError, npyv2.add_u32, a, a)
 
-    def test_unsigned_overflow(self):
-        for sfx in unsigned_sfx:
-            nlanes = getattr(npyv, "nlanes_" + sfx)
-            hfbyte_len = int(sfx[1:])//4
-            maxu = int(f"0x{'f'*hfbyte_len}", 16)
-            maxu_72 = 0xfffffffffffffffff
-            lane = getattr(npyv, "setall_" + sfx)(maxu_72)[0]
-            assert lane == maxu
-            lanes = getattr(npyv, "load_" + sfx)([maxu_72] * nlanes)
-            assert lanes == [maxu] * nlanes
-            lane = getattr(npyv, "setall_" + sfx)(-1)[0]
-            assert lane == maxu
-            lanes = getattr(npyv, "load_" + sfx)([-1] * nlanes)
-            assert lanes == [maxu] * nlanes
+    @pytest.mark.parametrize('sfx', unsigned_sfx)
+    def test_unsigned_overflow(self, sfx):
+        nlanes = getattr(npyv, "nlanes_" + sfx)
+        maxu = (1 << int(sfx[1:])) - 1
+        maxu_72 = (1 << 72) - 1
+        lane = getattr(npyv, "setall_" + sfx)(maxu_72)[0]
+        assert lane == maxu
+        lanes = getattr(npyv, "load_" + sfx)([maxu_72] * nlanes)
+        assert lanes == [maxu] * nlanes
+        lane = getattr(npyv, "setall_" + sfx)(-1)[0]
+        assert lane == maxu
+        lanes = getattr(npyv, "load_" + sfx)([-1] * nlanes)
+        assert lanes == [maxu] * nlanes
 
-    def test_signed_overflow(self):
-        for sfx in signed_sfx:
-            nlanes = getattr(npyv, "nlanes_" + sfx)
-            maxs_72 = 0x7fffffffffffffffff
-            lane = getattr(npyv, "setall_" + sfx)(maxs_72)[0]
-            assert lane == -1
-            lanes = getattr(npyv, "load_" + sfx)([maxs_72] * nlanes)
-            assert lanes == [-1] * nlanes
-            mins_72 = -0x80000000000000000
-            lane = getattr(npyv, "setall_" + sfx)(mins_72)[0]
-            assert lane == 0
-            lanes = getattr(npyv, "load_" + sfx)([mins_72] * nlanes)
-            assert lanes == [0] * nlanes
+    @pytest.mark.parametrize('sfx', signed_sfx)
+    def test_signed_overflow(self, sfx):
+        nlanes = getattr(npyv, "nlanes_" + sfx)
+        maxs_72 = (1 << 71) - 1
+        lane = getattr(npyv, "setall_" + sfx)(maxs_72)[0]
+        assert lane == -1
+        lanes = getattr(npyv, "load_" + sfx)([maxs_72] * nlanes)
+        assert lanes == [-1] * nlanes
+        mins_72 = -1 << 71
+        lane = getattr(npyv, "setall_" + sfx)(mins_72)[0]
+        assert lane == 0
+        lanes = getattr(npyv, "load_" + sfx)([mins_72] * nlanes)
+        assert lanes == [0] * nlanes
 
     def test_truncate_f32(self):
         f32 = npyv.setall_f32(0.1)[0]
diff --git a/numpy/distutils/command/build.py b/numpy/distutils/command/build.py
index 6025586cd..a4fda537d 100644
--- a/numpy/distutils/command/build.py
+++ b/numpy/distutils/command/build.py
@@ -47,7 +47,7 @@ class build(old_build):
             - not part of dispatch-able features(--cpu-dispatch)
             - not supported by compiler or platform
         """
-        self.simd_test = "BASELINE SSE2 SSE41 SSE42 XOP (FMA3 AVX2) AVX512F AVX512_SKX VSX VSX2 VSX3 NEON ASIMD"
+        self.simd_test = "BASELINE SSE2 SSE42 XOP FMA4 (FMA3 AVX2) AVX512F AVX512_SKX VSX VSX2 VSX3 NEON ASIMD"
 
     def finalize_options(self):
         build_scripts = self.build_scripts
author	Sayed Adel <seiko@imavr.com>	2020-09-15 16:06:14 +0200
committer	Sayed Adel <seiko@imavr.com>	2020-10-27 11:46:58 +0000
commit	7d125fb70cb149207171b7181312f9679dd4d451 (patch)
tree	8c6cbd3b0dd140160d96b98794f3284857681c82 /numpy
parent	e19f7a8cfe59226fed6cc7a7da9e881218364d49 (diff)
download	numpy-7d125fb70cb149207171b7181312f9679dd4d451.tar.gz