diff options
author | Sebastian Berg <sebastian@sipsolutions.net> | 2020-02-03 16:17:26 -0800 |
---|---|---|
committer | Sebastian Berg <sebastian@sipsolutions.net> | 2020-02-06 20:10:40 -0800 |
commit | 1a1611a33cfb5ea50d16d20affa5c6fa03e148d7 (patch) | |
tree | fb55b590501702b096a1fb3833c90589de1d86bb /numpy/core/defchararray.py | |
parent | dae4f67c797176c66281101be8f3b4d6c424735c (diff) | |
download | numpy-1a1611a33cfb5ea50d16d20affa5c6fa03e148d7.tar.gz |
DEP: Do not allow "abstract" dtype conversion/creation
These dtypes do not really make sense as instances. We can (somewhat)
reasonably define np.dtype(np.int64) as the default (machine endianess)
int64. (Arguably, it is unclear that `np.array(arr_of_>f8, dtype="f")`
should return arr_of_<f8, but that would be very noisy!)
However, `np.integer` as equivalent to long, is not well defined.
Similarly, `dtype=Decimal` may be neat to spell `dtype=object` when you
intend to put Decimal objects into the array. But it is misleading,
since there is no special meaning to it at this time.
The biggest issue with it, is that `arr.astype(np.floating)` looks
like it will let float32 or float128 pass, but it will force a
float64 output! Arguably downcasting is a bug in this case.
A related issue is `np.dtype("S")` and especially "S0". The dtype "S"
does make sense for most or all places where `dtype=...` can be
passed. However, it is conceptionally different from other dtypes, since
it will not end up being attached to the array (unlike "S2" which
would be). The dtype "S" really means the type number/DType class
of String, and not a specific dtype instance.
Diffstat (limited to 'numpy/core/defchararray.py')
-rw-r--r-- | numpy/core/defchararray.py | 18 |
1 files changed, 11 insertions, 7 deletions
diff --git a/numpy/core/defchararray.py b/numpy/core/defchararray.py index 942a698a9..26a9013e6 100644 --- a/numpy/core/defchararray.py +++ b/numpy/core/defchararray.py @@ -17,7 +17,8 @@ The preferred alias for `defchararray` is `numpy.char`. """ import functools import sys -from .numerictypes import string_, unicode_, integer, object_, bool_, character +from .numerictypes import ( + string_, unicode_, integer, int_, object_, bool_, character) from .numeric import ndarray, compare_chararrays from .numeric import array as narray from numpy.core.multiarray import _vec_string @@ -276,7 +277,10 @@ def str_len(a): -------- builtins.len """ - return _vec_string(a, integer, '__len__') + # Note: __len__, etc. currently return ints, which are not C-integers. + # Generally intp would be expected for lengths, although int is sufficient + # due to the dtype itemsize limitation. + return _vec_string(a, int_, '__len__') @array_function_dispatch(_binary_op_dispatcher) @@ -500,7 +504,7 @@ def count(a, sub, start=0, end=None): array([1, 0, 0]) """ - return _vec_string(a, integer, 'count', [sub, start] + _clean_args(end)) + return _vec_string(a, int_, 'count', [sub, start] + _clean_args(end)) def _code_dispatcher(a, encoding=None, errors=None): @@ -710,7 +714,7 @@ def find(a, sub, start=0, end=None): """ return _vec_string( - a, integer, 'find', [sub, start] + _clean_args(end)) + a, int_, 'find', [sub, start] + _clean_args(end)) @array_function_dispatch(_count_dispatcher) @@ -739,7 +743,7 @@ def index(a, sub, start=0, end=None): """ return _vec_string( - a, integer, 'index', [sub, start] + _clean_args(end)) + a, int_, 'index', [sub, start] + _clean_args(end)) @array_function_dispatch(_unary_op_dispatcher) @@ -1199,7 +1203,7 @@ def rfind(a, sub, start=0, end=None): """ return _vec_string( - a, integer, 'rfind', [sub, start] + _clean_args(end)) + a, int_, 'rfind', [sub, start] + _clean_args(end)) @array_function_dispatch(_count_dispatcher) @@ -1229,7 +1233,7 @@ def rindex(a, sub, start=0, end=None): """ return _vec_string( - a, integer, 'rindex', [sub, start] + _clean_args(end)) + a, int_, 'rindex', [sub, start] + _clean_args(end)) @array_function_dispatch(_just_dispatcher) |