ENH: core: Add PyArray_MinScalarType and expose it to Python

author: Mark Wiebe <mwwiebe@gmail.com> 2011-01-18 13:41:14 -0800
committer: Mark Wiebe <mwwiebe@gmail.com> 2011-01-18 13:41:14 -0800
commit: 81a28e7309e13f0a22464697b14c2c7d4c272ea5 (patch)
tree: 5830c6981c2e8bd7b23537070bbc9377ec08b16d /numpy
parent: ee06d183c407ea315b2eb3ef41ee422b0ea30251 (diff)
download: numpy-81a28e7309e13f0a22464697b14c2c7d4c272ea5.tar.gz
5 files changed, 307 insertions, 1 deletions
diff --git a/numpy/add_newdocs.py b/numpy/add_newdocs.py
index 92e438249..a3381d47e 100644
--- a/numpy/add_newdocs.py
+++ b/numpy/add_newdocs.py
@@ -1214,7 +1214,44 @@ add_newdoc('numpy.core.multiarray', 'promote_types',
     Traceback (most recent call last):
       File "<stdin>", line 1, in <module>
     TypeError: invalid type promotion
+    """)
+
+add_newdoc('numpy.core.multiarray', 'min_scalar_type',
+    """
+    min_scalar_type(a)
 
+    For scalar ``a``, returns the data type with the smallest size
+    and smallest scalar kind which can hold its value.  For vector ``a``,
+    returns the vector's dtype unmodified.
+
+    As a special case, floating point values are not reduced to integers.
+
+    Parameters
+    ----------
+    a : scalar or array_like
+        The value whose minimal data type is to be found.
+
+    Returns
+    -------
+    out : dtype
+        The minimal data type.
+
+    Examples
+    --------
+    >>> np.min_scalar_type(10)
+    dtype('uint8')
+
+    >>> np.min_scalar_type(-260)
+    dtype('int16')
+
+    >>> np.min_scalar_type(3.1)
+    dtype('float16')
+
+    >>> np.min_scalar_type(1e50)
+    dtype('float64')
+
+    >>> np.min_scalar_type(np.arange(4,dtype='f8'))
+    dtype('float64')
 
     """)
 
diff --git a/numpy/core/code_generators/numpy_api.py b/numpy/core/code_generators/numpy_api.py
index c3d639f90..341ff2e5e 100644
--- a/numpy/core/code_generators/numpy_api.py
+++ b/numpy/core/code_generators/numpy_api.py
@@ -295,6 +295,7 @@ multiarray_funcs_api = {
     'PyArray_CastingConverter':             261,
     'PyArray_CountNonzero':                 262,
     'PyArray_PromoteTypes':                 263,
+    'PyArray_MinScalarType':                264,
 }
 
 ufunc_types_api = {
diff --git a/numpy/core/numeric.py b/numpy/core/numeric.py
index 4648f4bb0..a39ef62bd 100644
--- a/numpy/core/numeric.py
+++ b/numpy/core/numeric.py
@@ -3,7 +3,7 @@ __all__ = ['newaxis', 'ndarray', 'flatiter', 'newiter', 'nested_iters', 'ufunc',
            'dtype', 'fromstring', 'fromfile', 'frombuffer',
            'int_asbuffer', 'where', 'argwhere',
            'concatenate', 'fastCopyAndTranspose', 'lexsort',
-           'set_numeric_ops', 'can_cast', 'promote_types',
+           'set_numeric_ops', 'can_cast', 'promote_types', 'min_scalar_type',
            'asarray', 'asanyarray', 'ascontiguousarray', 'asfortranarray',
            'isfortran', 'empty_like', 'zeros_like',
            'correlate', 'convolve', 'inner', 'dot', 'outer', 'vdot',
@@ -213,6 +213,7 @@ fastCopyAndTranspose = multiarray._fastCopyAndTranspose
 set_numeric_ops = multiarray.set_numeric_ops
 can_cast = multiarray.can_cast
 promote_types = multiarray.promote_types
+min_scalar_type = multiarray.min_scalar_type
 lexsort = multiarray.lexsort
 compare_chararrays = multiarray.compare_chararrays
 putmask = multiarray.putmask
diff --git a/numpy/core/src/multiarray/convert_datatype.c b/numpy/core/src/multiarray/convert_datatype.c
index abebda8a7..6d6ec7528 100644
--- a/numpy/core/src/multiarray/convert_datatype.c
+++ b/numpy/core/src/multiarray/convert_datatype.c
@@ -397,6 +397,249 @@ PyArray_PromoteTypes(PyArray_Descr *type1, PyArray_Descr *type2)
     return NULL;
 }
 
+/*
+ * NOTE: While this is unlikely to be a performance problem, if
+ *       it is it could be reverted to a simple positive/negative
+ *       check as the previous system used.
+ */
+static int min_scalar_type_num(char *valueptr, int type_num)
+{
+    switch (type_num) {
+        case NPY_BOOL: {
+            return NPY_BOOL;
+        }
+        case NPY_UBYTE: {
+            return NPY_UBYTE;
+        }
+        case NPY_BYTE: {
+            char value = *valueptr;
+            if (value >= 0) {
+                return NPY_UBYTE;
+            }
+            break;
+        }
+        case NPY_USHORT: {
+            npy_ushort value = *(npy_ushort *)valueptr;
+            if (value <= NPY_MAX_UBYTE) {
+                return NPY_UBYTE;
+            }
+            break;
+        }
+        case NPY_SHORT: {
+            npy_short value = *(npy_short *)valueptr;
+            if (value >= 0) {
+                return min_scalar_type_num(valueptr, NPY_USHORT);
+            }
+            else if (value >= NPY_MIN_BYTE) {
+                return NPY_BYTE;
+            }
+            break;
+        }
+#if NPY_SIZEOF_LONG == NPY_SIZEOF_INT
+        case NPY_ULONG:
+#endif
+        case NPY_UINT: {
+            npy_uint value = *(npy_uint *)valueptr;
+            if (value <= NPY_MAX_UBYTE) {
+                return NPY_UBYTE;
+            }
+            else if (value <= NPY_MAX_USHORT) {
+                return NPY_USHORT;
+            }
+            break;
+        }
+#if NPY_SIZEOF_LONG == NPY_SIZEOF_INT
+        case NPY_LONG:
+#endif
+        case NPY_INT: {
+            npy_int value = *(npy_int *)valueptr;
+            if (value >= 0) {
+                return min_scalar_type_num(valueptr, NPY_UINT);
+            }
+            else if (value >= NPY_MIN_BYTE) {
+                return NPY_BYTE;
+            }
+            else if (value >= NPY_MIN_SHORT) {
+                return NPY_SHORT;
+            }
+            break;
+        }
+#if NPY_SIZEOF_LONG != NPY_SIZEOF_INT && NPY_SIZEOF_LONG != NPY_SIZEOF_LONGLONG
+        case NPY_ULONG: {
+            npy_ulong value = *(npy_ulong *)valueptr;
+            if (value <= NPY_MAX_UBYTE) {
+                return NPY_UBYTE;
+            }
+            else if (value <= NPY_MAX_USHORT) {
+                return NPY_USHORT;
+            }
+            else if (value <= NPY_MAX_UINT) {
+                return NPY_UINT;
+            }
+            break;
+        }
+        case NPY_LONG: {
+            npy_long value = *(npy_long *)valueptr;
+            if (value >= 0) {
+                return min_scalar_type_num(valueptr, NPY_ULONG);
+            }
+            else if (value >= NPY_MIN_BYTE) {
+                return NPY_BYTE;
+            }
+            else if (value >= NPY_MIN_SHORT) {
+                return NPY_SHORT;
+            }
+            else if (value >= NPY_MIN_INT) {
+                return NPY_INT;
+            }
+            break;
+        }
+#endif
+#if NPY_SIZEOF_LONG == NPY_SIZEOF_LONGLONG
+        case NPY_ULONG:
+#endif
+        case NPY_ULONGLONG: {
+            npy_ulonglong value = *(npy_ulonglong *)valueptr;
+            if (value <= NPY_MAX_UBYTE) {
+                return NPY_UBYTE;
+            }
+            else if (value <= NPY_MAX_USHORT) {
+                return NPY_USHORT;
+            }
+            else if (value <= NPY_MAX_UINT) {
+                return NPY_UINT;
+            }
+#if NPY_SIZEOF_LONG != NPY_SIZEOF_INT && NPY_SIZEOF_LONG != NPY_SIZEOF_LONGLONG
+            else if (value <= NPY_MAX_ULONG) {
+                return NPY_ULONG;
+            }
+#endif
+            break;
+        }
+#if NPY_SIZEOF_LONG == NPY_SIZEOF_LONGLONG
+        case NPY_LONG:
+#endif
+        case NPY_LONGLONG: {
+            npy_longlong value = *(npy_longlong *)valueptr;
+            if (value >= 0) {
+                return min_scalar_type_num(valueptr, NPY_ULONGLONG);
+            }
+            else if (value >= NPY_MIN_BYTE) {
+                return NPY_BYTE;
+            }
+            else if (value >= NPY_MIN_SHORT) {
+                return NPY_SHORT;
+            }
+            else if (value >= NPY_MIN_INT) {
+                return NPY_INT;
+            }
+#if NPY_SIZEOF_LONG != NPY_SIZEOF_INT && NPY_SIZEOF_LONG != NPY_SIZEOF_LONGLONG
+            else if (value >= NPY_MIN_LONG) {
+                return NPY_LONG;
+            }
+#endif
+            break;
+        }
+        /*
+         * Float types aren't allowed to be demoted to integer types,
+         * but precision loss is allowed.
+         */
+        case NPY_HALF: {
+            return NPY_HALF;
+        }
+        case NPY_FLOAT: {
+            float value = *(float *)valueptr;
+            if (value > -65000 && value < 65000) {
+                return NPY_HALF;
+            }
+            break;
+        }
+        case NPY_DOUBLE: {
+            double value = *(double *)valueptr;
+            if (value > -65000 && value < 65000) {
+                return NPY_HALF;
+            }
+            else if (value > -3.4e38 && value < 3.4e38) {
+                return NPY_FLOAT;
+            }
+            break;
+        }
+        case NPY_LONGDOUBLE: {
+            npy_longdouble value = *(npy_longdouble *)valueptr;
+            if (value > -65000 && value < 65000) {
+                return NPY_HALF;
+            }
+            else if (value > -3.4e38 && value < 3.4e38) {
+                return NPY_FLOAT;
+            }
+            else if (value > -1.7e308 && value < 1.7e308) {
+                return NPY_DOUBLE;
+            }
+            break;
+        }
+        /*
+         * Complex types may be demoted to float types if the
+         * imaginary part is zero.
+         */
+        case NPY_CFLOAT: {
+            npy_cfloat value = *(npy_cfloat *)valueptr;
+            if (value.imag == 0) {
+                return min_scalar_type_num((char *)&value.real, NPY_FLOAT);
+            }
+            break;
+        }
+        case NPY_CDOUBLE: {
+            npy_cdouble value = *(npy_cdouble *)valueptr;
+            if (value.imag == 0) {
+                return min_scalar_type_num((char *)&value.real, NPY_DOUBLE);
+            }
+            /* TODO: Check overflow values as for float case */
+            return NPY_CFLOAT;
+        }
+        case NPY_CLONGDOUBLE: {
+            npy_cdouble value = *(npy_cdouble *)valueptr;
+            if (value.imag == 0) {
+                return min_scalar_type_num((char *)&value.real, NPY_LONGDOUBLE);
+            }
+            /* TODO: Check overflow values as for float case */
+            return NPY_CFLOAT;
+        }
+    }
+
+    return type_num;
+}
+
+/*NUMPY_API
+ * If arr is a scalar (has 0 dimensions) with a built-in number data type,
+ * finds the smallest type size/kind which can still represent its data.
+ * Otherwise, returns the array's data type.
+ * 
+ */
+NPY_NO_EXPORT PyArray_Descr *
+PyArray_MinScalarType(PyArrayObject *arr)
+{
+    PyArray_Descr *dtype = PyArray_DESCR(arr);
+    if (PyArray_NDIM(arr) > 0 || !PyTypeNum_ISNUMBER(dtype->type_num)) {
+        Py_INCREF(dtype);
+        return dtype;
+    }
+    else {
+        char *data = PyArray_BYTES(arr);
+        int swap = !PyArray_ISNBO(dtype->byteorder);
+        /* An aligned memory buffer large enough to hold any type */
+#if NPY_SIZEOF_LONGLONG >= NPY_SIZEOF_CLONGDOUBLE
+        npy_longlong value;
+#else
+        npy_clongdouble value;
+#endif
+        dtype->f->copyswap(&value, data, swap, NULL);
+
+        return PyArray_DescrFromType(
+                        min_scalar_type_num((char *)&value, dtype->type_num));
+
+    }
+}
+
 /*NUMPY_API
  * Is the typenum valid?
  */
diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c
index d558cbd04..ef4442368 100644
--- a/numpy/core/src/multiarray/multiarraymodule.c
+++ b/numpy/core/src/multiarray/multiarraymodule.c
@@ -2238,6 +2238,27 @@ array_promote_types(PyObject *NPY_UNUSED(dummy), PyObject *args)
     return ret;
 }
 
+static PyObject *
+array_min_scalar_type(PyObject *NPY_UNUSED(dummy), PyObject *args)
+{
+    PyObject *array_in = NULL;
+    PyArrayObject *array;
+    PyObject *ret = NULL;
+
+    if(!PyArg_ParseTuple(args, "O", &array_in)) {
+        return NULL;
+    }
+
+    array = (PyArrayObject *)PyArray_FromAny(array_in, NULL, 0, 0, 0, NULL);
+    if (array == NULL) {
+        return NULL;
+    }
+
+    ret = (PyObject *)PyArray_MinScalarType(array);
+    Py_DECREF(array);
+    return ret;
+}
+
 #if !defined(NPY_PY3K)
 static PyObject *
 new_buffer(PyObject *NPY_UNUSED(dummy), PyObject *args)
@@ -2846,6 +2867,9 @@ static struct PyMethodDef array_module_methods[] = {
     {"promote_types",
         (PyCFunction)array_promote_types,
         METH_VARARGS, NULL},
+    {"min_scalar_type",
+        (PyCFunction)array_min_scalar_type,
+        METH_VARARGS, NULL},
 #if !defined(NPY_PY3K)
     {"newbuffer",
         (PyCFunction)new_buffer,
author	Mark Wiebe <mwwiebe@gmail.com>	2011-01-18 13:41:14 -0800
committer	Mark Wiebe <mwwiebe@gmail.com>	2011-01-18 13:41:14 -0800
commit	81a28e7309e13f0a22464697b14c2c7d4c272ea5 (patch)
tree	5830c6981c2e8bd7b23537070bbc9377ec08b16d /numpy
parent	ee06d183c407ea315b2eb3ef41ee422b0ea30251 (diff)
download	numpy-81a28e7309e13f0a22464697b14c2c7d4c272ea5.tar.gz