Merge pull request #23358 from seberg/dtype-class-in-types

API: Add DType classes into new `numpy.dtypes` module
author: Matti Picus <matti.picus@gmail.com> 2023-04-27 14:11:43 +0300
committer: GitHub <noreply@github.com> 2023-04-27 14:11:43 +0300
commit: 6d34e34e7bc0dad1722958f00f3767d707c1849f (patch)
tree: 782c8250bc6af42a1a514a958c3f1488abdb8ca2 /numpy/core/src/multiarray
parent: e2db7ca90b42d4cdc2c61a0ebc116879c35b7825 (diff)
parent: 0dc83e270a7891ad27e38bb94d8567deaff5df51 (diff)
download: numpy-6d34e34e7bc0dad1722958f00f3767d707c1849f.tar.gz
6 files changed, 206 insertions, 153 deletions
diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src
index 92ddd1ad0..b84625c77 100644
--- a/numpy/core/src/multiarray/arraytypes.c.src
+++ b/numpy/core/src/multiarray/arraytypes.c.src
@@ -4646,12 +4646,30 @@ set_typeinfo(PyObject *dict)
      * should be defined on the class and inherited to the scalar.
      * (NPY_HALF is the largest builtin one.)
      */
-    for (i = 0; i <= NPY_HALF; i++) {
-        if (dtypemeta_wrap_legacy_descriptor(_builtin_descrs[i]) < 0) {
-            return -1;
-        }
+    /**begin repeat
+     *
+     * #NAME = BOOL,
+     *         BYTE, UBYTE, SHORT, USHORT, INT, UINT,
+     *         LONG, ULONG, LONGLONG, ULONGLONG,
+     *         HALF, FLOAT, DOUBLE, LONGDOUBLE,
+     *         CFLOAT, CDOUBLE, CLONGDOUBLE,
+     *         OBJECT, STRING, UNICODE, VOID,
+     *         DATETIME, TIMEDELTA#
+     */
+    if (dtypemeta_wrap_legacy_descriptor(
+            _builtin_descrs[NPY_@NAME@],
+            "numpy.dtypes." NPY_@NAME@_Name "DType",
+#ifdef NPY_@NAME@_alias
+            "numpy.dtypes." NPY_@NAME@_Alias "DType"
+#else
+            NULL
+#endif
+            ) < 0) {
+        return -1;
     }
 
+    /**end repeat**/
+
     /*
      * Add cast functions for the new types
      */
diff --git a/numpy/core/src/multiarray/arraytypes.h.src b/numpy/core/src/multiarray/arraytypes.h.src
index aad464ccf..90a075dad 100644
--- a/numpy/core/src/multiarray/arraytypes.h.src
+++ b/numpy/core/src/multiarray/arraytypes.h.src
@@ -66,4 +66,102 @@ NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT int @TYPE@_@func@,
 NPY_CPU_DISPATCH_DECLARE(NPY_NO_EXPORT int BOOL_argmax,
     (npy_bool *ip, npy_intp n, npy_intp *max_ind, PyArrayObject *aip))
 
+
+/*
+ * Define DType and scalar type names and aliases as used in Python.
+ */
+
+/**begin repeat
+ * #NAME = BOOL,
+ *         HALF, FLOAT, DOUBLE, LONGDOUBLE,
+ *         CFLOAT, CDOUBLE, CLONGDOUBLE,
+ *         STRING, UNICODE, VOID, OBJECT,
+ *         DATETIME, TIMEDELTA#
+ * #name = bool_,
+ *         float16, float32, float64, longdouble,
+ *         complex64, complex128, clongdouble,
+ *         bytes_, str_, void, object_,
+ *         datetime64, timedelta64#
+ * #Name = Bool,
+ *         Float16, Float32, Float64, LongDouble,
+ *         Complex64, Complex128, CLongDouble,
+ *         Bytes, Str, Void, Object,
+ *         DateTime64, TimeDelta64#
+ */
+#define NPY_@NAME@_name "@name@"
+#define NPY_@NAME@_Name "@Name@"
+
+/**end repeat**/
+
+
+/*
+ * Give integers different names when they are the same size (gh-9799).
+ * `intX` always refers to the first int of that size in the sequence
+ * `['LONG', 'LONGLONG', 'INT', 'SHORT', 'BYTE']`.
+ * Unfortunately, since the bitsize names are not strictly fixed, we add
+ * the C name for all integer types (as aliases).
+ *
+ * Right now, we do not define the C aliases for floats (which are always
+ * the same).
+ */
+
+#if NPY_SIZEOF_BYTE == NPY_SIZEOF_SHORT
+    #define BYTE_not_size_named
+#endif
+#if NPY_SIZEOF_SHORT == NPY_SIZEOF_INT
+    #define SHORT_not_size_named
+#endif
+#if NPY_SIZEOF_INT == NPY_SIZEOF_LONG
+    #define INT_not_size_named
+#endif
+#if NPY_SIZEOF_LONGLONG == NPY_SIZEOF_LONG
+    #define LONGLONG_not_size_named
+#endif
+
+
+/**begin repeat
+ * #NAME = BYTE, SHORT, INT, LONG, LONGLONG,
+ *         UBYTE, USHORT, UINT, ULONG, ULONGLONG#
+ * #CNAME = (BYTE, SHORT, INT, LONG, LONGLONG)*2#
+ * #cname = byte, short, intc, int_, longlong,
+ *          ubyte, ushort, uintc, uint, ulonglong#
+ * #CName = Byte, Short, Int, Long, LongLong,
+ *          UByte, UShort, UInt, ULong, ULongLong#
+ * #bitname = int*5, uint*5#
+ * #BitName = Int*5, UInt*5#
+ */
+
+#ifdef @CNAME@_not_size_named
+    #define NPY_@NAME@_name "@cname@"
+    #define NPY_@NAME@_Name "@CName@"
+#else
+    /* The C-name is considered just an alias for these: */
+    #define NPY_@NAME@_alias "@cname@"
+    #define NPY_@NAME@_Alias "@CName@"
+
+    /* The bitsof macro includes math, so cannot be stringified */
+    #if NPY_BITSOF_@CNAME@ == 8
+        #define NPY_@NAME@_name "@bitname@8"
+        #define NPY_@NAME@_Name "@BitName@8"
+    #elif NPY_BITSOF_@CNAME@ == 16
+        #define NPY_@NAME@_name "@bitname@16"
+        #define NPY_@NAME@_Name "@BitName@16"
+    #elif NPY_BITSOF_@CNAME@ == 32
+        #define NPY_@NAME@_name "@bitname@32"
+        #define NPY_@NAME@_Name "@BitName@32"
+    #elif NPY_BITSOF_@CNAME@ == 64
+        #define NPY_@NAME@_name "@bitname@64"
+        #define NPY_@NAME@_Name "@BitName@64"
+    #else
+        #error "need to fix integer bit-length name code"
+    #endif
+#endif
+
+/**end repeat**/
+
+#undef BYTE_not_size_named
+#undef SHORT_not_size_named
+#undef INT_not_size_named
+#undef LONGLONG_not_size_named
+
 #endif  /* NUMPY_CORE_SRC_MULTIARRAY_ARRAYTYPES_H_ */
diff --git a/numpy/core/src/multiarray/dtypemeta.c b/numpy/core/src/multiarray/dtypemeta.c
index f8c1b6617..4990afa18 100644
--- a/numpy/core/src/multiarray/dtypemeta.c
+++ b/numpy/core/src/multiarray/dtypemeta.c
@@ -9,7 +9,9 @@
 #include <numpy/ndarraytypes.h>
 #include <numpy/arrayscalars.h>
 #include "npy_pycompat.h"
+#include "npy_import.h"
 
+#include "arraytypes.h"
 #include "common.h"
 #include "dtypemeta.h"
 #include "descriptor.h"
@@ -726,12 +728,17 @@ object_common_dtype(
  * be a HeapType and its instances should be exact PyArray_Descr structs.
  *
  * @param descr The descriptor that should be wrapped.
- * @param name The name for the DType, if NULL the type character is used.
+ * @param name The name for the DType.
+ * @param alias A second name which is also set to the new class for builtins
+ *              (i.e. `np.types.LongDType` for `np.types.Int64DType`).
+ *              Some may have more aliases, as `intp` is not its own thing,
+ *              as of writing this, these are not added here.
  *
  * @returns 0 on success, -1 on failure.
  */
 NPY_NO_EXPORT int
-dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr)
+dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr,
+        const char *name, const char *alias)
 {
     int has_type_set = Py_TYPE(descr) == &PyArrayDescr_Type;
 
@@ -758,47 +765,14 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr)
         return -1;
     }
 
-    /*
-     * Note: we have no intention of freeing the memory again since this
-     * behaves identically to static type definition (see comment above).
-     * This is seems cleaner for the legacy API, in the new API both static
-     * and heap types are possible (some difficulty arises from the fact that
-     * these are instances of DTypeMeta and not type).
-     * In particular our own DTypes can be true static declarations.
-     * However, this function remains necessary for legacy user dtypes.
-     */
-
-    const char *scalar_name = descr->typeobj->tp_name;
-    /*
-     * We have to take only the name, and ignore the module to get
-     * a reasonable __name__, since static types are limited in this regard
-     * (this is not ideal, but not a big issue in practice).
-     * This is what Python does to print __name__ for static types.
-     */
-    const char *dot = strrchr(scalar_name, '.');
-    if (dot) {
-        scalar_name = dot + 1;
-    }
-    Py_ssize_t name_length = strlen(scalar_name) + 14;
-
-    char *tp_name = PyMem_Malloc(name_length);
-    if (tp_name == NULL) {
-        PyErr_NoMemory();
-        return -1;
-    }
-
-    snprintf(tp_name, name_length, "numpy.dtype[%s]", scalar_name);
-
     NPY_DType_Slots *dt_slots = PyMem_Malloc(sizeof(NPY_DType_Slots));
     if (dt_slots == NULL) {
-        PyMem_Free(tp_name);
         return -1;
     }
     memset(dt_slots, '\0', sizeof(NPY_DType_Slots));
 
     PyArray_DTypeMeta *dtype_class = PyMem_Malloc(sizeof(PyArray_DTypeMeta));
     if (dtype_class == NULL) {
-        PyMem_Free(tp_name);
         PyMem_Free(dt_slots);
         return -1;
     }
@@ -820,13 +794,19 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr)
             .tp_flags = Py_TPFLAGS_DEFAULT,
             .tp_base = &PyArrayDescr_Type,
             .tp_new = (newfunc)legacy_dtype_default_new,
+            .tp_doc = (
+                "DType class corresponding to the scalar type and dtype of "
+                "the same name.\n\n"
+                "Please see `numpy.dtype` for the typical way to create\n"
+                "dtype instances and :ref:`arrays.dtypes` for additional\n"
+                "information."),
         },},
         .flags = NPY_DT_LEGACY,
         /* Further fields are not common between DTypes */
     };
     memcpy(dtype_class, &prototype, sizeof(PyArray_DTypeMeta));
     /* Fix name of the Type*/
-    ((PyTypeObject *)dtype_class)->tp_name = tp_name;
+    ((PyTypeObject *)dtype_class)->tp_name = name;
     dtype_class->dt_slots = dt_slots;
 
     /* Let python finish the initialization (probably unnecessary) */
@@ -921,6 +901,21 @@ dtypemeta_wrap_legacy_descriptor(PyArray_Descr *descr)
     /* Finally, replace the current class of the descr */
     Py_SET_TYPE(descr, (PyTypeObject *)dtype_class);
 
+    /* And it to the types submodule if it is a builtin dtype */
+    if (!PyTypeNum_ISUSERDEF(descr->type_num)) {
+        static PyObject *add_dtype_helper = NULL;
+        npy_cache_import("numpy.dtypes", "_add_dtype_helper", &add_dtype_helper);
+        if (add_dtype_helper == NULL) {
+            return -1;
+        }
+
+        if (PyObject_CallFunction(
+                add_dtype_helper,
+                "Os", (PyObject *)dtype_class, alias) == NULL) {
+            return -1;
+        }
+    }
+
     return 0;
 }
 
@@ -958,7 +953,8 @@ static PyGetSetDef dtypemeta_getset[] = {
 
 static PyMemberDef dtypemeta_members[] = {
     {"type",
-        T_OBJECT, offsetof(PyArray_DTypeMeta, scalar_type), READONLY, NULL},
+        T_OBJECT, offsetof(PyArray_DTypeMeta, scalar_type), READONLY,
+        "scalar type corresponding to the DType."},
     {NULL, 0, 0, 0, NULL},
 };
 
diff --git a/numpy/core/src/multiarray/dtypemeta.h b/numpy/core/src/multiarray/dtypemeta.h
index 6dbfd1549..32212228c 100644
--- a/numpy/core/src/multiarray/dtypemeta.h
+++ b/numpy/core/src/multiarray/dtypemeta.h
@@ -140,7 +140,8 @@ python_builtins_are_known_scalar_types(
         PyArray_DTypeMeta *cls, PyTypeObject *pytype);
 
 NPY_NO_EXPORT int
-dtypemeta_wrap_legacy_descriptor(PyArray_Descr *dtypem);
+dtypemeta_wrap_legacy_descriptor(
+        PyArray_Descr *dtypem, const char *name, const char *alias);
 
 #ifdef __cplusplus
 }
diff --git a/numpy/core/src/multiarray/scalartypes.c.src b/numpy/core/src/multiarray/scalartypes.c.src
index 17163ef09..c721800be 100644
--- a/numpy/core/src/multiarray/scalartypes.c.src
+++ b/numpy/core/src/multiarray/scalartypes.c.src
@@ -15,6 +15,7 @@
 
 #include "npy_pycompat.h"
 
+#include "arraytypes.h"
 #include "npy_config.h"
 #include "mapping.h"
 #include "ctors.h"
@@ -3581,7 +3582,7 @@ object_arrtype_call(PyObjectScalarObject *obj, PyObject *args, PyObject *kwds)
 
 NPY_NO_EXPORT PyTypeObject PyObjectArrType_Type = {
     PyVarObject_HEAD_INIT(NULL, 0)
-    .tp_name = "numpy.object_",
+    .tp_name = "numpy." NPY_OBJECT_name,
     .tp_basicsize = sizeof(PyObjectScalarObject),
     .tp_dealloc = (destructor)object_arrtype_dealloc,
     .tp_as_sequence = &object_arrtype_as_sequence,
@@ -3617,60 +3618,29 @@ gen_arrtype_subscript(PyObject *self, PyObject *key)
 }
 
 
-#define NAME_bool "bool"
-#define NAME_void "void"
-#define NAME_string "bytes"
-#define NAME_unicode "str"
-
 /**begin repeat
- * #name = bool, string, unicode, void#
- * #NAME = Bool, String, Unicode, Void#
- * #ex = _,_,_,#
+ * #Name = Bool,
+ *         Byte, Short, Int, Long, LongLong,
+ *         UByte, UShort, UInt, ULong, ULongLong,
+ *         Half, Float, Double, LongDouble,
+ *         CFloat, CDouble, CLongDouble,
+ *         String, Unicode, Void,
+ *         Datetime, Timedelta#
+ * #NAME = BOOL,
+ *         BYTE, SHORT, INT, LONG, LONGLONG,
+ *         UBYTE, USHORT, UINT, ULONG, ULONGLONG,
+ *         HALF, FLOAT, DOUBLE, LONGDOUBLE,
+ *         CFLOAT, CDOUBLE, CLONGDOUBLE,
+ *         STRING, UNICODE, VOID,
+ *         DATETIME, TIMEDELTA#
  */
-NPY_NO_EXPORT PyTypeObject Py@NAME@ArrType_Type = {
-    PyVarObject_HEAD_INIT(NULL, 0)
-    .tp_name = "numpy." NAME_@name@ "@ex@",
-    .tp_basicsize = sizeof(Py@NAME@ScalarObject),
-};
-/**end repeat**/
-
-#undef NAME_bool
-#undef NAME_void
-#undef NAME_string
-#undef NAME_unicode
 
-/**begin repeat
- * #NAME = Byte, Short, Int, Long, LongLong, UByte, UShort, UInt, ULong,
- *         ULongLong, Half, Float, Double, LongDouble, Datetime, Timedelta#
- * #name = int*5, uint*5, float*4, datetime, timedelta#
- * #CNAME = (CHAR, SHORT, INT, LONG, LONGLONG)*2, HALF, FLOAT, DOUBLE,
- *          LONGDOUBLE, DATETIME, TIMEDELTA#
- */
-#if NPY_BITSOF_@CNAME@ == 8
-#define _THIS_SIZE "8"
-#elif NPY_BITSOF_@CNAME@ == 16
-#define _THIS_SIZE "16"
-#elif NPY_BITSOF_@CNAME@ == 32
-#define _THIS_SIZE "32"
-#elif NPY_BITSOF_@CNAME@ == 64
-#define _THIS_SIZE "64"
-#elif NPY_BITSOF_@CNAME@ == 80
-#define _THIS_SIZE "80"
-#elif NPY_BITSOF_@CNAME@ == 96
-#define _THIS_SIZE "96"
-#elif NPY_BITSOF_@CNAME@ == 128
-#define _THIS_SIZE "128"
-#elif NPY_BITSOF_@CNAME@ == 256
-#define _THIS_SIZE "256"
-#endif
-NPY_NO_EXPORT PyTypeObject Py@NAME@ArrType_Type = {
+NPY_NO_EXPORT PyTypeObject Py@Name@ArrType_Type = {
     PyVarObject_HEAD_INIT(NULL, 0)
-    .tp_name = "numpy.@name@" _THIS_SIZE,
-    .tp_basicsize = sizeof(Py@NAME@ScalarObject),
+    .tp_name = "numpy." NPY_@NAME@_name,
+    .tp_basicsize = sizeof(Py@Name@ScalarObject),
 };
 
-
-#undef _THIS_SIZE
 /**end repeat**/
 
 
@@ -3679,37 +3649,6 @@ static PyMappingMethods gentype_as_mapping = {
 };
 
 
-/**begin repeat
- * #NAME = CFloat, CDouble, CLongDouble#
- * #name = complex*3#
- * #CNAME = FLOAT, DOUBLE, LONGDOUBLE#
- */
-#if NPY_BITSOF_@CNAME@ == 16
-#define _THIS_SIZE "32"
-#elif NPY_BITSOF_@CNAME@ == 32
-#define _THIS_SIZE "64"
-#elif NPY_BITSOF_@CNAME@ == 64
-#define _THIS_SIZE "128"
-#elif NPY_BITSOF_@CNAME@ == 80
-#define _THIS_SIZE "160"
-#elif NPY_BITSOF_@CNAME@ == 96
-#define _THIS_SIZE "192"
-#elif NPY_BITSOF_@CNAME@ == 128
-#define _THIS_SIZE "256"
-#elif NPY_BITSOF_@CNAME@ == 256
-#define _THIS_SIZE "512"
-#endif
-
-NPY_NO_EXPORT PyTypeObject Py@NAME@ArrType_Type = {
-    PyVarObject_HEAD_INIT(0, 0)
-    .tp_name = "numpy.@name@" _THIS_SIZE,
-    .tp_basicsize = sizeof(Py@NAME@ScalarObject),
-    .tp_flags = Py_TPFLAGS_DEFAULT,
-};
-#undef _THIS_SIZE
-
-/**end repeat**/
-
 /*
  * This table maps the built-in type numbers to their scalar
  * type numbers.  Note that signed integers are mapped to INTNEG_SCALAR,
@@ -4098,36 +4037,6 @@ initialize_numeric_types(void)
 
     PyArrayIter_Type.tp_iter = PyObject_SelfIter;
     PyArrayMapIter_Type.tp_iter = PyObject_SelfIter;
-
-    /*
-     * Give types different names when they are the same size (gh-9799).
-     * `np.intX` always refers to the first int of that size in the sequence
-     * `['LONG', 'LONGLONG', 'INT', 'SHORT', 'BYTE']`.
-     */
-#if (NPY_SIZEOF_BYTE == NPY_SIZEOF_SHORT)
-    PyByteArrType_Type.tp_name = "numpy.byte";
-    PyUByteArrType_Type.tp_name = "numpy.ubyte";
-#endif
-#if (NPY_SIZEOF_SHORT == NPY_SIZEOF_INT)
-    PyShortArrType_Type.tp_name = "numpy.short";
-    PyUShortArrType_Type.tp_name = "numpy.ushort";
-#endif
-#if (NPY_SIZEOF_INT == NPY_SIZEOF_LONG)
-    PyIntArrType_Type.tp_name = "numpy.intc";
-    PyUIntArrType_Type.tp_name = "numpy.uintc";
-#endif
-#if (NPY_SIZEOF_LONGLONG == NPY_SIZEOF_LONG)
-    PyLongLongArrType_Type.tp_name = "numpy.longlong";
-    PyULongLongArrType_Type.tp_name = "numpy.ulonglong";
-#endif
-
-    /*
-    Do the same for longdouble
-    */
-#if (NPY_SIZEOF_LONGDOUBLE == NPY_SIZEOF_DOUBLE)
-    PyLongDoubleArrType_Type.tp_name = "numpy.longdouble";
-    PyCLongDoubleArrType_Type.tp_name = "numpy.clongdouble";
-#endif
 }
 
 typedef struct {
diff --git a/numpy/core/src/multiarray/usertypes.c b/numpy/core/src/multiarray/usertypes.c
index a172343f1..16ca74ef8 100644
--- a/numpy/core/src/multiarray/usertypes.c
+++ b/numpy/core/src/multiarray/usertypes.c
@@ -261,12 +261,43 @@ PyArray_RegisterDataType(PyArray_Descr *descr)
         return -1;
     }
 
+    /*
+     * Legacy user DTypes classes cannot have a name, since the user never
+     * defined one.  So we create a name for them here. These DTypes are
+     * effectively static types.
+     *
+     * Note: we have no intention of freeing the memory again since this
+     * behaves identically to static type definition.
+     */
+
+    const char *scalar_name = descr->typeobj->tp_name;
+    /*
+     * We have to take only the name, and ignore the module to get
+     * a reasonable __name__, since static types are limited in this regard
+     * (this is not ideal, but not a big issue in practice).
+     * This is what Python does to print __name__ for static types.
+     */
+    const char *dot = strrchr(scalar_name, '.');
+    if (dot) {
+        scalar_name = dot + 1;
+    }
+    Py_ssize_t name_length = strlen(scalar_name) + 14;
+
+    char *name = PyMem_Malloc(name_length);
+    if (name == NULL) {
+        PyErr_NoMemory();
+        return -1;
+    }
+
+    snprintf(name, name_length, "numpy.dtype[%s]", scalar_name);
+
     userdescrs[NPY_NUMUSERTYPES++] = descr;
 
     descr->type_num = typenum;
-    if (dtypemeta_wrap_legacy_descriptor(descr) < 0) {
+    if (dtypemeta_wrap_legacy_descriptor(descr, name, NULL) < 0) {
         descr->type_num = -1;
         NPY_NUMUSERTYPES--;
+        PyMem_Free(name);  /* free the name only on failure */
         return -1;
     }
     if (use_void_clearimpl) {
author	Matti Picus <matti.picus@gmail.com>	2023-04-27 14:11:43 +0300
committer	GitHub <noreply@github.com>	2023-04-27 14:11:43 +0300
commit	6d34e34e7bc0dad1722958f00f3767d707c1849f (patch)
tree	782c8250bc6af42a1a514a958c3f1488abdb8ca2 /numpy/core/src/multiarray
parent	e2db7ca90b42d4cdc2c61a0ebc116879c35b7825 (diff)
parent	0dc83e270a7891ad27e38bb94d8567deaff5df51 (diff)
download	numpy-6d34e34e7bc0dad1722958f00f3767d707c1849f.tar.gz