Merge branch 'main' into cython3_noexcept

author: scoder <stefan_ml@behnel.de> 2023-05-04 09:29:53 +0200
committer: GitHub <noreply@github.com> 2023-05-04 09:29:53 +0200
commit: 442c8f48d3146ec32c7d5387310e171276cf10ac (patch)
tree: d8911d1a64e384b7955d3fc09a07edd218a9f1ee /numpy/core/defchararray.py
parent: 3e4a6cba2da27bbe2a6e12c163238e503c9f6a07 (diff)
parent: 9163e933df91b516b6f0c7a9ba8ad1750e642f37 (diff)
download: numpy-442c8f48d3146ec32c7d5387310e171276cf10ac.tar.gz
1 files changed, 304 insertions, 185 deletions
diff --git a/numpy/core/defchararray.py b/numpy/core/defchararray.py
index 1d447b86a..11c5a30bf 100644
--- a/numpy/core/defchararray.py
+++ b/numpy/core/defchararray.py
@@ -6,7 +6,7 @@ operations and methods.
    The `chararray` class exists for backwards compatibility with
    Numarray, it is not recommended for new development. Starting from numpy
    1.4, if one needs arrays of strings, it is recommended to use arrays of
-   `dtype` `object_`, `string_` or `unicode_`, and use the free functions
+   `dtype` `object_`, `bytes_` or `str_`, and use the free functions
    in the `numpy.char` module for fast vectorized string operations.
 
 Some methods will only be available if the corresponding string method is
@@ -16,13 +16,13 @@ The preferred alias for `defchararray` is `numpy.char`.
 
 """
 import functools
-import sys
+
+from .._utils import set_module
 from .numerictypes import (
-    string_, unicode_, integer, int_, object_, bool_, character)
+    bytes_, str_, integer, int_, object_, bool_, character)
 from .numeric import ndarray, compare_chararrays
 from .numeric import array as narray
 from numpy.core.multiarray import _vec_string
-from numpy.core.overrides import set_module
 from numpy.core import overrides
 from numpy.compat import asbytes
 import numpy
@@ -46,26 +46,29 @@ array_function_dispatch = functools.partial(
     overrides.array_function_dispatch, module='numpy.char')
 
 
-def _use_unicode(*args):
-    """
-    Helper function for determining the output type of some string
-    operations.
+def _is_unicode(arr):
+    """Returns True if arr is a string or a string array with a dtype that
+    represents a unicode string, otherwise returns False.
 
-    For an operation on two ndarrays, if at least one is unicode, the
-    result should be unicode.
     """
-    for x in args:
-        if (isinstance(x, str) or
-                issubclass(numpy.asarray(x).dtype.type, unicode_)):
-            return unicode_
-    return string_
+    if (isinstance(arr, str) or
+            issubclass(numpy.asarray(arr).dtype.type, str)):
+        return True
+    return False
+
 
-def _to_string_or_unicode_array(result):
+def _to_bytes_or_str_array(result, output_dtype_like=None):
     """
-    Helper function to cast a result back into a string or unicode array
-    if an object array must be used as an intermediary.
+    Helper function to cast a result back into an array
+    with the appropriate dtype if an object array must be used
+    as an intermediary.
     """
-    return numpy.asarray(result.tolist())
+    ret = numpy.asarray(result.tolist())
+    dtype = getattr(output_dtype_like, 'dtype', None)
+    if dtype is not None:
+        return ret.astype(type(dtype)(_get_num_chars(ret)), copy=False)
+    return ret
+
 
 def _clean_args(*args):
     """
@@ -89,7 +92,7 @@ def _get_num_chars(a):
     a string or unicode array.  This is to abstract out the fact that
     for a unicode array this is itemsize / 4.
     """
-    if issubclass(a.dtype.type, unicode_):
+    if issubclass(a.dtype.type, str_):
         return a.itemsize // 4
     return a.itemsize
 
@@ -114,8 +117,8 @@ def equal(x1, x2):
 
     Returns
     -------
-    out : ndarray or bool
-        Output array of bools, or a single bool if x1 and x2 are scalars.
+    out : ndarray
+        Output array of bools.
 
     See Also
     --------
@@ -140,8 +143,8 @@ def not_equal(x1, x2):
 
     Returns
     -------
-    out : ndarray or bool
-        Output array of bools, or a single bool if x1 and x2 are scalars.
+    out : ndarray
+        Output array of bools.
 
     See Also
     --------
@@ -167,8 +170,8 @@ def greater_equal(x1, x2):
 
     Returns
     -------
-    out : ndarray or bool
-        Output array of bools, or a single bool if x1 and x2 are scalars.
+    out : ndarray
+        Output array of bools.
 
     See Also
     --------
@@ -193,8 +196,8 @@ def less_equal(x1, x2):
 
     Returns
     -------
-    out : ndarray or bool
-        Output array of bools, or a single bool if x1 and x2 are scalars.
+    out : ndarray
+        Output array of bools.
 
     See Also
     --------
@@ -219,8 +222,8 @@ def greater(x1, x2):
 
     Returns
     -------
-    out : ndarray or bool
-        Output array of bools, or a single bool if x1 and x2 are scalars.
+    out : ndarray
+        Output array of bools.
 
     See Also
     --------
@@ -245,8 +248,8 @@ def less(x1, x2):
 
     Returns
     -------
-    out : ndarray or bool
-        Output array of bools, or a single bool if x1 and x2 are scalars.
+    out : ndarray
+        Output array of bools.
 
     See Also
     --------
@@ -273,9 +276,21 @@ def str_len(a):
     out : ndarray
         Output array of integers
 
-    See also
+    See Also
+    --------
+    len
+
+    Examples
     --------
-    builtins.len
+    >>> a = np.array(['Grace Hopper Conference', 'Open Source Day'])
+    >>> np.char.str_len(a)
+    array([23, 15])
+    >>> a = np.array([u'\u0420', u'\u043e'])
+    >>> np.char.str_len(a)
+    array([1, 1])
+    >>> a = np.array([['hello', 'world'], [u'\u0420', u'\u043e']])
+    >>> np.char.str_len(a)
+    array([[5, 5], [1, 1]])
     """
     # Note: __len__, etc. currently return ints, which are not C-integers.
     # Generally intp would be expected for lengths, although int is sufficient
@@ -300,16 +315,26 @@ def add(x1, x2):
     Returns
     -------
     add : ndarray
-        Output array of `string_` or `unicode_`, depending on input types
+        Output array of `bytes_` or `str_`, depending on input types
         of the same shape as `x1` and `x2`.
 
     """
     arr1 = numpy.asarray(x1)
     arr2 = numpy.asarray(x2)
     out_size = _get_num_chars(arr1) + _get_num_chars(arr2)
-    dtype = _use_unicode(arr1, arr2)
-    return _vec_string(arr1, (dtype, out_size), '__add__', (arr2,))
 
+    if type(arr1.dtype) != type(arr2.dtype):
+        # Enforce this for now.  The solution to it will be implement add
+        # as a ufunc.  It never worked right on Python 3: bytes + unicode gave
+        # nonsense unicode + bytes errored, and unicode + object used the
+        # object dtype itemsize as num chars (worked on short strings).
+        # bytes + void worked but promoting void->bytes is dubious also.
+        raise TypeError(
+            "np.char.add() requires both arrays of the same dtype kind, but "
+            f"got dtypes: '{arr1.dtype}' and '{arr2.dtype}' (the few cases "
+            "where this used to work often lead to incorrect results).")
+
+    return _vec_string(arr1, type(arr1.dtype)(out_size), '__add__', (arr2,))
 
 def _multiply_dispatcher(a, i):
     return (a,)
@@ -334,7 +359,24 @@ def multiply(a, i):
     -------
     out : ndarray
         Output array of str or unicode, depending on input types
-
+    
+    Examples
+    --------
+    >>> a = np.array(["a", "b", "c"])
+    >>> np.char.multiply(x, 3)
+    array(['aaa', 'bbb', 'ccc'], dtype='<U3')
+    >>> i = np.array([1, 2, 3])
+    >>> np.char.multiply(a, i)
+    array(['a', 'bb', 'ccc'], dtype='<U3')
+    >>> np.char.multiply(np.array(['a']), i)
+    array(['a', 'aa', 'aaa'], dtype='<U3')
+    >>> a = np.array(['a', 'b', 'c', 'd', 'e', 'f']).reshape((2, 3))
+    >>> np.char.multiply(a, 3)
+    array([['aaa', 'bbb', 'ccc'],
+           ['ddd', 'eee', 'fff']], dtype='<U3')
+    >>> np.char.multiply(a, i)
+    array([['a', 'bb', 'ccc'],
+           ['d', 'ee', 'fff']], dtype='<U3')
     """
     a_arr = numpy.asarray(a)
     i_arr = numpy.asarray(i)
@@ -342,7 +384,7 @@ def multiply(a, i):
         raise ValueError("Can only multiply by integers")
     out_size = _get_num_chars(a_arr) * max(int(i_arr.max()), 0)
     return _vec_string(
-        a_arr, (a_arr.dtype.type, out_size), '__mul__', (i_arr,))
+        a_arr, type(a_arr.dtype)(out_size), '__mul__', (i_arr,))
 
 
 def _mod_dispatcher(a, values):
@@ -368,13 +410,13 @@ def mod(a, values):
     out : ndarray
         Output array of str or unicode, depending on input types
 
-    See also
+    See Also
     --------
     str.__mod__
 
     """
-    return _to_string_or_unicode_array(
-        _vec_string(a, object_, '__mod__', (values,)))
+    return _to_bytes_or_str_array(
+        _vec_string(a, object_, '__mod__', (values,)), a)
 
 
 @array_function_dispatch(_unary_op_dispatcher)
@@ -398,7 +440,7 @@ def capitalize(a):
         Output array of str or unicode, depending on input
         types
 
-    See also
+    See Also
     --------
     str.capitalize
 
@@ -443,18 +485,34 @@ def center(a, width, fillchar=' '):
         Output array of str or unicode, depending on input
         types
 
-    See also
+    See Also
     --------
     str.center
+    
+    Notes
+    -----
+    This function is intended to work with arrays of strings.  The
+    fill character is not applied to numeric types.
+
+    Examples
+    --------
+    >>> c = np.array(['a1b2','1b2a','b2a1','2a1b']); c
+    array(['a1b2', '1b2a', 'b2a1', '2a1b'], dtype='<U4')
+    >>> np.char.center(c, width=9)
+    array(['   a1b2  ', '   1b2a  ', '   b2a1  ', '   2a1b  '], dtype='<U9')
+    >>> np.char.center(c, width=9, fillchar='*')
+    array(['***a1b2**', '***1b2a**', '***b2a1**', '***2a1b**'], dtype='<U9')
+    >>> np.char.center(c, width=1)
+    array(['a', '1', 'b', '2'], dtype='<U1')
 
     """
     a_arr = numpy.asarray(a)
     width_arr = numpy.asarray(width)
     size = int(numpy.max(width_arr.flat))
-    if numpy.issubdtype(a_arr.dtype, numpy.string_):
+    if numpy.issubdtype(a_arr.dtype, numpy.bytes_):
         fillchar = asbytes(fillchar)
     return _vec_string(
-        a_arr, (a_arr.dtype.type, size), 'center', (width_arr, fillchar))
+        a_arr, type(a_arr.dtype)(size), 'center', (width_arr, fillchar))
 
 
 def _count_dispatcher(a, sub, start=None, end=None):
@@ -485,7 +543,7 @@ def count(a, sub, start=0, end=None):
     out : ndarray
         Output array of ints.
 
-    See also
+    See Also
     --------
     str.count
 
@@ -513,8 +571,8 @@ def _code_dispatcher(a, encoding=None, errors=None):
 
 @array_function_dispatch(_code_dispatcher)
 def decode(a, encoding=None, errors=None):
-    """
-    Calls `str.decode` element-wise.
+    r"""
+    Calls ``bytes.decode`` element-wise.
 
     The set of available codecs comes from the Python standard library,
     and may be extended at runtime.  For more information, see the
@@ -534,9 +592,9 @@ def decode(a, encoding=None, errors=None):
     -------
     out : ndarray
 
-    See also
+    See Also
     --------
-    str.decode
+    :py:meth:`bytes.decode`
 
     Notes
     -----
@@ -544,16 +602,16 @@ def decode(a, encoding=None, errors=None):
 
     Examples
     --------
-    >>> c = np.array(['aAaAaA', '  aA  ', 'abBABba'])
+    >>> c = np.array([b'\x81\xc1\x81\xc1\x81\xc1', b'@@\x81\xc1@@',
+    ...               b'\x81\x82\xc2\xc1\xc2\x82\x81'])
     >>> c
+    array([b'\x81\xc1\x81\xc1\x81\xc1', b'@@\x81\xc1@@',
+    ...    b'\x81\x82\xc2\xc1\xc2\x82\x81'], dtype='|S7')
+    >>> np.char.decode(c, encoding='cp037')
     array(['aAaAaA', '  aA  ', 'abBABba'], dtype='<U7')
-    >>> np.char.encode(c, encoding='cp037')
-    array(['\\x81\\xc1\\x81\\xc1\\x81\\xc1', '@@\\x81\\xc1@@',
-        '\\x81\\x82\\xc2\\xc1\\xc2\\x82\\x81'],
-        dtype='|S7')
 
     """
-    return _to_string_or_unicode_array(
+    return _to_bytes_or_str_array(
         _vec_string(a, object_, 'decode', _clean_args(encoding, errors)))
 
 
@@ -580,7 +638,7 @@ def encode(a, encoding=None, errors=None):
     -------
     out : ndarray
 
-    See also
+    See Also
     --------
     str.encode
 
@@ -589,7 +647,7 @@ def encode(a, encoding=None, errors=None):
     The type of the result will depend on the encoding specified.
 
     """
-    return _to_string_or_unicode_array(
+    return _to_bytes_or_str_array(
         _vec_string(a, object_, 'encode', _clean_args(encoding, errors)))
 
 
@@ -620,7 +678,7 @@ def endswith(a, suffix, start=0, end=None):
     out : ndarray
         Outputs an array of bools.
 
-    See also
+    See Also
     --------
     str.endswith
 
@@ -672,13 +730,13 @@ def expandtabs(a, tabsize=8):
     out : ndarray
         Output array of str or unicode, depending on input type
 
-    See also
+    See Also
     --------
     str.expandtabs
 
     """
-    return _to_string_or_unicode_array(
-        _vec_string(a, object_, 'expandtabs', (tabsize,)))
+    return _to_bytes_or_str_array(
+        _vec_string(a, object_, 'expandtabs', (tabsize,)), a)
 
 
 @array_function_dispatch(_count_dispatcher)
@@ -708,10 +766,16 @@ def find(a, sub, start=0, end=None):
     out : ndarray or int
         Output array of ints.  Returns -1 if `sub` is not found.
 
-    See also
+    See Also
     --------
     str.find
 
+    Examples
+    --------
+    >>> a = np.array(["NumPy is a Python library"])
+    >>> np.char.find(a, "Python", start=0, end=None)
+    array([11])
+
     """
     return _vec_string(
         a, int_, 'find', [sub, start] + _clean_args(end))
@@ -737,10 +801,16 @@ def index(a, sub, start=0, end=None):
     out : ndarray
         Output array of ints.  Returns -1 if `sub` is not found.
 
-    See also
+    See Also
     --------
     find, str.find
 
+    Examples
+    --------
+    >>> a = np.array(["Computer Science"])
+    >>> np.char.index(a, "Science", start=0, end=None)
+    array([9])
+
     """
     return _vec_string(
         a, int_, 'index', [sub, start] + _clean_args(end))
@@ -765,7 +835,7 @@ def isalnum(a):
     out : ndarray
         Output array of str or unicode, depending on input type
 
-    See also
+    See Also
     --------
     str.isalnum
     """
@@ -791,7 +861,7 @@ def isalpha(a):
     out : ndarray
         Output array of bools
 
-    See also
+    See Also
     --------
     str.isalpha
     """
@@ -817,9 +887,18 @@ def isdigit(a):
     out : ndarray
         Output array of bools
 
-    See also
+    See Also
     --------
     str.isdigit
+
+    Examples
+    --------
+    >>> a = np.array(['a', 'b', '0'])
+    >>> np.char.isdigit(a)
+    array([False, False,  True])
+    >>> a = np.array([['a', 'b', '0'], ['c', '1', '2']])
+    >>> np.char.isdigit(a)
+    array([[False, False,  True], [False,  True,  True]])
     """
     return _vec_string(a, bool_, 'isdigit')
 
@@ -844,7 +923,7 @@ def islower(a):
     out : ndarray
         Output array of bools
 
-    See also
+    See Also
     --------
     str.islower
     """
@@ -871,7 +950,7 @@ def isspace(a):
     out : ndarray
         Output array of bools
 
-    See also
+    See Also
     --------
     str.isspace
     """
@@ -897,7 +976,7 @@ def istitle(a):
     out : ndarray
         Output array of bools
 
-    See also
+    See Also
     --------
     str.istitle
     """
@@ -907,7 +986,7 @@ def istitle(a):
 @array_function_dispatch(_unary_op_dispatcher)
 def isupper(a):
     """
-    Returns true for each element if all cased characters in the
+    Return true for each element if all cased characters in the
     string are uppercase and there is at least one character, false
     otherwise.
 
@@ -924,9 +1003,19 @@ def isupper(a):
     out : ndarray
         Output array of bools
 
-    See also
+    See Also
     --------
     str.isupper
+
+    Examples
+    --------
+    >>> str = "GHC"
+    >>> np.char.isupper(str)
+    array(True)     
+    >>> a = np.array(["hello", "HELLO", "Hello"])
+    >>> np.char.isupper(a)
+    array([False,  True, False]) 
+
     """
     return _vec_string(a, bool_, 'isupper')
 
@@ -953,12 +1042,21 @@ def join(sep, seq):
     out : ndarray
         Output array of str or unicode, depending on input types
 
-    See also
+    See Also
     --------
     str.join
+
+    Examples
+    --------
+    >>> np.char.join('-', 'osd')
+    array('o-s-d', dtype='<U5')
+
+    >>> np.char.join(['-', '.'], ['ghc', 'osd'])
+    array(['g-h-c', 'o.s.d'], dtype='<U5')
+
     """
-    return _to_string_or_unicode_array(
-        _vec_string(sep, object_, 'join', (seq,)))
+    return _to_bytes_or_str_array(
+        _vec_string(sep, object_, 'join', (seq,)), seq)
 
 
 
@@ -988,7 +1086,7 @@ def ljust(a, width, fillchar=' '):
     out : ndarray
         Output array of str or unicode, depending on input type
 
-    See also
+    See Also
     --------
     str.ljust
 
@@ -996,10 +1094,10 @@ def ljust(a, width, fillchar=' '):
     a_arr = numpy.asarray(a)
     width_arr = numpy.asarray(width)
     size = int(numpy.max(width_arr.flat))
-    if numpy.issubdtype(a_arr.dtype, numpy.string_):
+    if numpy.issubdtype(a_arr.dtype, numpy.bytes_):
         fillchar = asbytes(fillchar)
     return _vec_string(
-        a_arr, (a_arr.dtype.type, size), 'ljust', (width_arr, fillchar))
+        a_arr, type(a_arr.dtype)(size), 'ljust', (width_arr, fillchar))
 
 
 @array_function_dispatch(_unary_op_dispatcher)
@@ -1021,7 +1119,7 @@ def lower(a):
     out : ndarray, {str, unicode}
         Output array of str or unicode, depending on input type
 
-    See also
+    See Also
     --------
     str.lower
 
@@ -1066,7 +1164,7 @@ def lstrip(a, chars=None):
     out : ndarray, {str, unicode}
         Output array of str or unicode, depending on input type
 
-    See also
+    See Also
     --------
     str.lstrip
 
@@ -1127,13 +1225,13 @@ def partition(a, sep):
         The output array will have an extra dimension with 3
         elements per input element.
 
-    See also
+    See Also
     --------
     str.partition
 
     """
-    return _to_string_or_unicode_array(
-        _vec_string(a, object_, 'partition', (sep,)))
+    return _to_bytes_or_str_array(
+        _vec_string(a, object_, 'partition', (sep,)), a)
 
 
 def _replace_dispatcher(a, old, new, count=None):
@@ -1163,14 +1261,22 @@ def replace(a, old, new, count=None):
     out : ndarray
         Output array of str or unicode, depending on input type
 
-    See also
+    See Also
     --------
     str.replace
+    
+    Examples
+    --------
+    >>> a = np.array(["That is a mango", "Monkeys eat mangos"])
+    >>> np.char.replace(a, 'mango', 'banana')
+    array(['That is a banana', 'Monkeys eat bananas'], dtype='<U19')
 
+    >>> a = np.array(["The dish is fresh", "This is it"])
+    >>> np.char.replace(a, 'is', 'was')
+    array(['The dwash was fresh', 'Thwas was it'], dtype='<U19')
     """
-    return _to_string_or_unicode_array(
-        _vec_string(
-            a, object_, 'replace', [old, new] + _clean_args(count)))
+    return _to_bytes_or_str_array(
+        _vec_string(a, object_, 'replace', [old, new] + _clean_args(count)), a)
 
 
 @array_function_dispatch(_count_dispatcher)
@@ -1197,7 +1303,7 @@ def rfind(a, sub, start=0, end=None):
     out : ndarray
        Output array of ints.  Return -1 on failure.
 
-    See also
+    See Also
     --------
     str.rfind
 
@@ -1227,7 +1333,7 @@ def rindex(a, sub, start=0, end=None):
     out : ndarray
        Output array of ints.
 
-    See also
+    See Also
     --------
     rfind, str.rindex
 
@@ -1258,7 +1364,7 @@ def rjust(a, width, fillchar=' '):
     out : ndarray
         Output array of str or unicode, depending on input type
 
-    See also
+    See Also
     --------
     str.rjust
 
@@ -1266,10 +1372,10 @@ def rjust(a, width, fillchar=' '):
     a_arr = numpy.asarray(a)
     width_arr = numpy.asarray(width)
     size = int(numpy.max(width_arr.flat))
-    if numpy.issubdtype(a_arr.dtype, numpy.string_):
+    if numpy.issubdtype(a_arr.dtype, numpy.bytes_):
         fillchar = asbytes(fillchar)
     return _vec_string(
-        a_arr, (a_arr.dtype.type, size), 'rjust', (width_arr, fillchar))
+        a_arr, type(a_arr.dtype)(size), 'rjust', (width_arr, fillchar))
 
 
 @array_function_dispatch(_partition_dispatcher)
@@ -1299,13 +1405,13 @@ def rpartition(a, sep):
         type.  The output array will have an extra dimension with
         3 elements per input element.
 
-    See also
+    See Also
     --------
     str.rpartition
 
     """
-    return _to_string_or_unicode_array(
-        _vec_string(a, object_, 'rpartition', (sep,)))
+    return _to_bytes_or_str_array(
+        _vec_string(a, object_, 'rpartition', (sep,)), a)
 
 
 def _split_dispatcher(a, sep=None, maxsplit=None):
@@ -1339,7 +1445,7 @@ def rsplit(a, sep=None, maxsplit=None):
     out : ndarray
        Array of list objects
 
-    See also
+    See Also
     --------
     str.rsplit, split
 
@@ -1378,7 +1484,7 @@ def rstrip(a, chars=None):
     out : ndarray
         Output array of str or unicode, depending on input type
 
-    See also
+    See Also
     --------
     str.rstrip
 
@@ -1423,7 +1529,7 @@ def split(a, sep=None, maxsplit=None):
     out : ndarray
         Array of list objects
 
-    See also
+    See Also
     --------
     str.split, rsplit
 
@@ -1459,7 +1565,7 @@ def splitlines(a, keepends=None):
     out : ndarray
         Array of list objects
 
-    See also
+    See Also
     --------
     str.splitlines
 
@@ -1495,7 +1601,7 @@ def startswith(a, prefix, start=0, end=None):
     out : ndarray
         Array of booleans
 
-    See also
+    See Also
     --------
     str.startswith
 
@@ -1528,7 +1634,7 @@ def strip(a, chars=None):
     out : ndarray
         Output array of str or unicode, depending on input type
 
-    See also
+    See Also
     --------
     str.strip
 
@@ -1569,7 +1675,7 @@ def swapcase(a):
     out : ndarray, {str, unicode}
         Output array of str or unicode, depending on input type
 
-    See also
+    See Also
     --------
     str.swapcase
 
@@ -1609,7 +1715,7 @@ def title(a):
     out : ndarray
         Output array of str or unicode, depending on input type
 
-    See also
+    See Also
     --------
     str.title
 
@@ -1654,13 +1760,13 @@ def translate(a, table, deletechars=None):
     out : ndarray
         Output array of str or unicode, depending on input type
 
-    See also
+    See Also
     --------
     str.translate
 
     """
     a_arr = numpy.asarray(a)
-    if issubclass(a_arr.dtype.type, unicode_):
+    if issubclass(a_arr.dtype.type, str_):
         return _vec_string(
             a_arr, a_arr.dtype, 'translate', (table,))
     else:
@@ -1687,7 +1793,7 @@ def upper(a):
     out : ndarray, {str, unicode}
         Output array of str or unicode, depending on input type
 
-    See also
+    See Also
     --------
     str.upper
 
@@ -1726,7 +1832,7 @@ def zfill(a, width):
     out : ndarray, {str, unicode}
         Output array of str or unicode, depending on input type
 
-    See also
+    See Also
     --------
     str.zfill
 
@@ -1735,7 +1841,7 @@ def zfill(a, width):
     width_arr = numpy.asarray(width)
     size = int(numpy.max(width_arr.flat))
     return _vec_string(
-        a_arr, (a_arr.dtype.type, size), 'zfill', (width_arr,))
+        a_arr, type(a_arr.dtype)(size), 'zfill', (width_arr,))
 
 
 @array_function_dispatch(_unary_op_dispatcher)
@@ -1744,7 +1850,7 @@ def isnumeric(a):
     For each element, return True if there are only numeric
     characters in the element.
 
-    Calls `unicode.isnumeric` element-wise.
+    Calls `str.isnumeric` element-wise.
 
     Numeric characters include digit characters, and all characters
     that have the Unicode numeric value property, e.g. ``U+2155,
@@ -1760,12 +1866,17 @@ def isnumeric(a):
     out : ndarray, bool
         Array of booleans of same shape as `a`.
 
-    See also
+    See Also
+    --------
+    str.isnumeric
+
+    Examples
     --------
-    unicode.isnumeric
+    >>> np.char.isnumeric(['123', '123abc', '9.0', '1/4', 'VIII'])
+    array([ True, False, False, False, False])
 
     """
-    if _use_unicode(a) != unicode_:
+    if not _is_unicode(a):
         raise TypeError("isnumeric is only available for Unicode strings and arrays")
     return _vec_string(a, bool_, 'isnumeric')
 
@@ -1776,7 +1887,7 @@ def isdecimal(a):
     For each element, return True if there are only decimal
     characters in the element.
 
-    Calls `unicode.isdecimal` element-wise.
+    Calls `str.isdecimal` element-wise.
 
     Decimal characters include digit characters, and all characters
     that can be used to form decimal-radix numbers,
@@ -1792,13 +1903,19 @@ def isdecimal(a):
     out : ndarray, bool
         Array of booleans identical in shape to `a`.
 
-    See also
+    See Also
     --------
-    unicode.isdecimal
+    str.isdecimal
 
-    """
-    if _use_unicode(a) != unicode_:
-        raise TypeError("isnumeric is only available for Unicode strings and arrays")
+    Examples
+    --------
+    >>> np.char.isdecimal(['12345', '4.99', '123ABC', ''])
+    array([ True, False, False, False])
+
+    """ 
+    if not _is_unicode(a):
+        raise TypeError(
+            "isdecimal is only available for Unicode strings and arrays")
     return _vec_string(a, bool_, 'isdecimal')
 
 
@@ -1814,7 +1931,7 @@ class chararray(ndarray):
        The `chararray` class exists for backwards compatibility with
        Numarray, it is not recommended for new development. Starting from numpy
        1.4, if one needs arrays of strings, it is recommended to use arrays of
-       `dtype` `object_`, `string_` or `unicode_`, and use the free functions
+       `dtype` `object_`, `bytes_` or `str_`, and use the free functions
        in the `numpy.char` module for fast vectorized string operations.
 
     Versus a regular NumPy array of type `str` or `unicode`, this
@@ -1948,9 +2065,9 @@ class chararray(ndarray):
         global _globalvar
 
         if unicode:
-            dtype = unicode_
+            dtype = str_
         else:
-            dtype = string_
+            dtype = bytes_
 
         # force itemsize to be a Python int, since using NumPy integer
         # types results in itemsize.itemsize being used as the size of
@@ -2004,7 +2121,7 @@ class chararray(ndarray):
         """
         Return (self == other) element-wise.
 
-        See also
+        See Also
         --------
         equal
         """
@@ -2014,7 +2131,7 @@ class chararray(ndarray):
         """
         Return (self != other) element-wise.
 
-        See also
+        See Also
         --------
         not_equal
         """
@@ -2024,7 +2141,7 @@ class chararray(ndarray):
         """
         Return (self >= other) element-wise.
 
-        See also
+        See Also
         --------
         greater_equal
         """
@@ -2034,7 +2151,7 @@ class chararray(ndarray):
         """
         Return (self <= other) element-wise.
 
-        See also
+        See Also
         --------
         less_equal
         """
@@ -2044,7 +2161,7 @@ class chararray(ndarray):
         """
         Return (self > other) element-wise.
 
-        See also
+        See Also
         --------
         greater
         """
@@ -2054,7 +2171,7 @@ class chararray(ndarray):
         """
         Return (self < other) element-wise.
 
-        See also
+        See Also
         --------
         less
         """
@@ -2065,7 +2182,7 @@ class chararray(ndarray):
         Return (self + other), that is string concatenation,
         element-wise for a pair of array_likes of str or unicode.
 
-        See also
+        See Also
         --------
         add
         """
@@ -2074,9 +2191,9 @@ class chararray(ndarray):
     def __radd__(self, other):
         """
         Return (other + self), that is string concatenation,
-        element-wise for a pair of array_likes of `string_` or `unicode_`.
+        element-wise for a pair of array_likes of `bytes_` or `str_`.
 
-        See also
+        See Also
         --------
         add
         """
@@ -2087,7 +2204,7 @@ class chararray(ndarray):
         Return (self * i), that is string multiple concatenation,
         element-wise.
 
-        See also
+        See Also
         --------
         multiply
         """
@@ -2098,7 +2215,7 @@ class chararray(ndarray):
         Return (self * i), that is string multiple concatenation,
         element-wise.
 
-        See also
+        See Also
         --------
         multiply
         """
@@ -2107,10 +2224,10 @@ class chararray(ndarray):
     def __mod__(self, i):
         """
         Return (self % i), that is pre-Python 2.6 string formatting
-        (interpolation), element-wise for a pair of array_likes of `string_`
-        or `unicode_`.
+        (interpolation), element-wise for a pair of array_likes of `bytes_`
+        or `str_`.
 
-        See also
+        See Also
         --------
         mod
         """
@@ -2145,7 +2262,7 @@ class chararray(ndarray):
         Return a copy of `self` with only the first character of each element
         capitalized.
 
-        See also
+        See Also
         --------
         char.capitalize
 
@@ -2157,7 +2274,7 @@ class chararray(ndarray):
         Return a copy of `self` with its elements centered in a
         string of length `width`.
 
-        See also
+        See Also
         --------
         center
         """
@@ -2168,7 +2285,7 @@ class chararray(ndarray):
         Returns an array with the number of non-overlapping occurrences of
         substring `sub` in the range [`start`, `end`].
 
-        See also
+        See Also
         --------
         char.count
 
@@ -2177,9 +2294,9 @@ class chararray(ndarray):
 
     def decode(self, encoding=None, errors=None):
         """
-        Calls `str.decode` element-wise.
+        Calls ``bytes.decode`` element-wise.
 
-        See also
+        See Also
         --------
         char.decode
 
@@ -2190,7 +2307,7 @@ class chararray(ndarray):
         """
         Calls `str.encode` element-wise.
 
-        See also
+        See Also
         --------
         char.encode
 
@@ -2202,7 +2319,7 @@ class chararray(ndarray):
         Returns a boolean array which is `True` where the string element
         in `self` ends with `suffix`, otherwise `False`.
 
-        See also
+        See Also
         --------
         char.endswith
 
@@ -2214,7 +2331,7 @@ class chararray(ndarray):
         Return a copy of each string element where all tab characters are
         replaced by one or more spaces.
 
-        See also
+        See Also
         --------
         char.expandtabs
 
@@ -2226,7 +2343,7 @@ class chararray(ndarray):
         For each element, return the lowest index in the string where
         substring `sub` is found.
 
-        See also
+        See Also
         --------
         char.find
 
@@ -2237,7 +2354,7 @@ class chararray(ndarray):
         """
         Like `find`, but raises `ValueError` when the substring is not found.
 
-        See also
+        See Also
         --------
         char.index
 
@@ -2250,7 +2367,7 @@ class chararray(ndarray):
         are alphanumeric and there is at least one character, false
         otherwise.
 
-        See also
+        See Also
         --------
         char.isalnum
 
@@ -2263,7 +2380,7 @@ class chararray(ndarray):
         are alphabetic and there is at least one character, false
         otherwise.
 
-        See also
+        See Also
         --------
         char.isalpha
 
@@ -2275,7 +2392,7 @@ class chararray(ndarray):
         Returns true for each element if all characters in the string are
         digits and there is at least one character, false otherwise.
 
-        See also
+        See Also
         --------
         char.isdigit
 
@@ -2288,7 +2405,7 @@ class chararray(ndarray):
         string are lowercase and there is at least one cased character,
         false otherwise.
 
-        See also
+        See Also
         --------
         char.islower
 
@@ -2301,7 +2418,7 @@ class chararray(ndarray):
         characters in the string and there is at least one character,
         false otherwise.
 
-        See also
+        See Also
         --------
         char.isspace
 
@@ -2313,7 +2430,7 @@ class chararray(ndarray):
         Returns true for each element if the element is a titlecased
         string and there is at least one character, false otherwise.
 
-        See also
+        See Also
         --------
         char.istitle
 
@@ -2326,7 +2443,7 @@ class chararray(ndarray):
         string are uppercase and there is at least one character, false
         otherwise.
 
-        See also
+        See Also
         --------
         char.isupper
 
@@ -2338,7 +2455,7 @@ class chararray(ndarray):
         Return a string which is the concatenation of the strings in the
         sequence `seq`.
 
-        See also
+        See Also
         --------
         char.join
 
@@ -2350,7 +2467,7 @@ class chararray(ndarray):
         Return an array with the elements of `self` left-justified in a
         string of length `width`.
 
-        See also
+        See Also
         --------
         char.ljust
 
@@ -2362,7 +2479,7 @@ class chararray(ndarray):
         Return an array with the elements of `self` converted to
         lowercase.
 
-        See also
+        See Also
         --------
         char.lower
 
@@ -2374,7 +2491,7 @@ class chararray(ndarray):
         For each element in `self`, return a copy with the leading characters
         removed.
 
-        See also
+        See Also
         --------
         char.lstrip
 
@@ -2385,7 +2502,7 @@ class chararray(ndarray):
         """
         Partition each element in `self` around `sep`.
 
-        See also
+        See Also
         --------
         partition
         """
@@ -2396,7 +2513,7 @@ class chararray(ndarray):
         For each element in `self`, return a copy of the string with all
         occurrences of substring `old` replaced by `new`.
 
-        See also
+        See Also
         --------
         char.replace
 
@@ -2409,7 +2526,7 @@ class chararray(ndarray):
         where substring `sub` is found, such that `sub` is contained
         within [`start`, `end`].
 
-        See also
+        See Also
         --------
         char.rfind
 
@@ -2421,7 +2538,7 @@ class chararray(ndarray):
         Like `rfind`, but raises `ValueError` when the substring `sub` is
         not found.
 
-        See also
+        See Also
         --------
         char.rindex
 
@@ -2433,7 +2550,7 @@ class chararray(ndarray):
         Return an array with the elements of `self`
         right-justified in a string of length `width`.
 
-        See also
+        See Also
         --------
         char.rjust
 
@@ -2444,7 +2561,7 @@ class chararray(ndarray):
         """
         Partition each element in `self` around `sep`.
 
-        See also
+        See Also
         --------
         rpartition
         """
@@ -2455,7 +2572,7 @@ class chararray(ndarray):
         For each element in `self`, return a list of the words in
         the string, using `sep` as the delimiter string.
 
-        See also
+        See Also
         --------
         char.rsplit
 
@@ -2467,7 +2584,7 @@ class chararray(ndarray):
         For each element in `self`, return a copy with the trailing
         characters removed.
 
-        See also
+        See Also
         --------
         char.rstrip
 
@@ -2479,7 +2596,7 @@ class chararray(ndarray):
         For each element in `self`, return a list of the words in the
         string, using `sep` as the delimiter string.
 
-        See also
+        See Also
         --------
         char.split
 
@@ -2491,7 +2608,7 @@ class chararray(ndarray):
         For each element in `self`, return a list of the lines in the
         element, breaking at line boundaries.
 
-        See also
+        See Also
         --------
         char.splitlines
 
@@ -2503,7 +2620,7 @@ class chararray(ndarray):
         Returns a boolean array which is `True` where the string element
         in `self` starts with `prefix`, otherwise `False`.
 
-        See also
+        See Also
         --------
         char.startswith
 
@@ -2515,7 +2632,7 @@ class chararray(ndarray):
         For each element in `self`, return a copy with the leading and
         trailing characters removed.
 
-        See also
+        See Also
         --------
         char.strip
 
@@ -2527,7 +2644,7 @@ class chararray(ndarray):
         For each element in `self`, return a copy of the string with
         uppercase characters converted to lowercase and vice versa.
 
-        See also
+        See Also
         --------
         char.swapcase
 
@@ -2540,7 +2657,7 @@ class chararray(ndarray):
         string: words start with uppercase characters, all remaining cased
         characters are lowercase.
 
-        See also
+        See Also
         --------
         char.title
 
@@ -2554,7 +2671,7 @@ class chararray(ndarray):
         `deletechars` are removed, and the remaining characters have
         been mapped through the given translation table.
 
-        See also
+        See Also
         --------
         char.translate
 
@@ -2566,7 +2683,7 @@ class chararray(ndarray):
         Return an array with the elements of `self` converted to
         uppercase.
 
-        See also
+        See Also
         --------
         char.upper
 
@@ -2578,7 +2695,7 @@ class chararray(ndarray):
         Return the numeric string left-filled with zeros in a string of
         length `width`.
 
-        See also
+        See Also
         --------
         char.zfill
 
@@ -2590,7 +2707,7 @@ class chararray(ndarray):
         For each element in `self`, return True if there are only
         numeric characters in the element.
 
-        See also
+        See Also
         --------
         char.isnumeric
 
@@ -2602,7 +2719,7 @@ class chararray(ndarray):
         For each element in `self`, return True if there are only
         decimal characters in the element.
 
-        See also
+        See Also
         --------
         char.isdecimal
 
@@ -2610,6 +2727,7 @@ class chararray(ndarray):
         return isdecimal(self)
 
 
+@set_module("numpy.char")
 def array(obj, itemsize=None, copy=True, unicode=None, order=None):
     """
     Create a `chararray`.
@@ -2617,7 +2735,7 @@ def array(obj, itemsize=None, copy=True, unicode=None, order=None):
     .. note::
        This class is provided for numarray backward-compatibility.
        New code (not concerned with numarray compatibility) should use
-       arrays of type `string_` or `unicode_` and use the free functions
+       arrays of type `bytes_` or `str_` and use the free functions
        in :mod:`numpy.char <numpy.core.defchararray>` for fast
        vectorized string operations instead.
 
@@ -2700,26 +2818,26 @@ def array(obj, itemsize=None, copy=True, unicode=None, order=None):
             # itemsize is in 8-bit chars, so for Unicode, we need
             # to divide by the size of a single Unicode character,
             # which for NumPy is always 4
-            if issubclass(obj.dtype.type, unicode_):
+            if issubclass(obj.dtype.type, str_):
                 itemsize //= 4
 
         if unicode is None:
-            if issubclass(obj.dtype.type, unicode_):
+            if issubclass(obj.dtype.type, str_):
                 unicode = True
             else:
                 unicode = False
 
         if unicode:
-            dtype = unicode_
+            dtype = str_
         else:
-            dtype = string_
+            dtype = bytes_
 
         if order is not None:
             obj = numpy.asarray(obj, order=order)
         if (copy or
                 (itemsize != obj.itemsize) or
-                (not unicode and isinstance(obj, unicode_)) or
-                (unicode and isinstance(obj, string_))):
+                (not unicode and isinstance(obj, str_)) or
+                (unicode and isinstance(obj, bytes_))):
             obj = obj.astype((dtype, int(itemsize)))
         return obj
 
@@ -2732,9 +2850,9 @@ def array(obj, itemsize=None, copy=True, unicode=None, order=None):
             # Fall through to the default case
 
     if unicode:
-        dtype = unicode_
+        dtype = str_
     else:
-        dtype = string_
+        dtype = bytes_
 
     if itemsize is None:
         val = narray(obj, dtype=dtype, order=order, subok=True)
@@ -2743,6 +2861,7 @@ def array(obj, itemsize=None, copy=True, unicode=None, order=None):
     return val.view(chararray)
 
 
+@set_module("numpy.char")
 def asarray(obj, itemsize=None, unicode=None, order=None):
     """
     Convert the input to a `chararray`, copying the data only if
author	scoder <stefan_ml@behnel.de>	2023-05-04 09:29:53 +0200
committer	GitHub <noreply@github.com>	2023-05-04 09:29:53 +0200
commit	442c8f48d3146ec32c7d5387310e171276cf10ac (patch)
tree	d8911d1a64e384b7955d3fc09a07edd218a9f1ee /numpy/core/defchararray.py
parent	3e4a6cba2da27bbe2a6e12c163238e503c9f6a07 (diff)
parent	9163e933df91b516b6f0c7a9ba8ad1750e642f37 (diff)
download	numpy-442c8f48d3146ec32c7d5387310e171276cf10ac.tar.gz