summaryrefslogtreecommitdiff
path: root/numpy/core/defchararray.py
diff options
context:
space:
mode:
authorscoder <stefan_ml@behnel.de>2023-05-04 09:29:53 +0200
committerGitHub <noreply@github.com>2023-05-04 09:29:53 +0200
commit442c8f48d3146ec32c7d5387310e171276cf10ac (patch)
treed8911d1a64e384b7955d3fc09a07edd218a9f1ee /numpy/core/defchararray.py
parent3e4a6cba2da27bbe2a6e12c163238e503c9f6a07 (diff)
parent9163e933df91b516b6f0c7a9ba8ad1750e642f37 (diff)
downloadnumpy-442c8f48d3146ec32c7d5387310e171276cf10ac.tar.gz
Merge branch 'main' into cython3_noexcept
Diffstat (limited to 'numpy/core/defchararray.py')
-rw-r--r--numpy/core/defchararray.py489
1 files changed, 304 insertions, 185 deletions
diff --git a/numpy/core/defchararray.py b/numpy/core/defchararray.py
index 1d447b86a..11c5a30bf 100644
--- a/numpy/core/defchararray.py
+++ b/numpy/core/defchararray.py
@@ -6,7 +6,7 @@ operations and methods.
The `chararray` class exists for backwards compatibility with
Numarray, it is not recommended for new development. Starting from numpy
1.4, if one needs arrays of strings, it is recommended to use arrays of
- `dtype` `object_`, `string_` or `unicode_`, and use the free functions
+ `dtype` `object_`, `bytes_` or `str_`, and use the free functions
in the `numpy.char` module for fast vectorized string operations.
Some methods will only be available if the corresponding string method is
@@ -16,13 +16,13 @@ The preferred alias for `defchararray` is `numpy.char`.
"""
import functools
-import sys
+
+from .._utils import set_module
from .numerictypes import (
- string_, unicode_, integer, int_, object_, bool_, character)
+ bytes_, str_, integer, int_, object_, bool_, character)
from .numeric import ndarray, compare_chararrays
from .numeric import array as narray
from numpy.core.multiarray import _vec_string
-from numpy.core.overrides import set_module
from numpy.core import overrides
from numpy.compat import asbytes
import numpy
@@ -46,26 +46,29 @@ array_function_dispatch = functools.partial(
overrides.array_function_dispatch, module='numpy.char')
-def _use_unicode(*args):
- """
- Helper function for determining the output type of some string
- operations.
+def _is_unicode(arr):
+ """Returns True if arr is a string or a string array with a dtype that
+ represents a unicode string, otherwise returns False.
- For an operation on two ndarrays, if at least one is unicode, the
- result should be unicode.
"""
- for x in args:
- if (isinstance(x, str) or
- issubclass(numpy.asarray(x).dtype.type, unicode_)):
- return unicode_
- return string_
+ if (isinstance(arr, str) or
+ issubclass(numpy.asarray(arr).dtype.type, str)):
+ return True
+ return False
+
-def _to_string_or_unicode_array(result):
+def _to_bytes_or_str_array(result, output_dtype_like=None):
"""
- Helper function to cast a result back into a string or unicode array
- if an object array must be used as an intermediary.
+ Helper function to cast a result back into an array
+ with the appropriate dtype if an object array must be used
+ as an intermediary.
"""
- return numpy.asarray(result.tolist())
+ ret = numpy.asarray(result.tolist())
+ dtype = getattr(output_dtype_like, 'dtype', None)
+ if dtype is not None:
+ return ret.astype(type(dtype)(_get_num_chars(ret)), copy=False)
+ return ret
+
def _clean_args(*args):
"""
@@ -89,7 +92,7 @@ def _get_num_chars(a):
a string or unicode array. This is to abstract out the fact that
for a unicode array this is itemsize / 4.
"""
- if issubclass(a.dtype.type, unicode_):
+ if issubclass(a.dtype.type, str_):
return a.itemsize // 4
return a.itemsize
@@ -114,8 +117,8 @@ def equal(x1, x2):
Returns
-------
- out : ndarray or bool
- Output array of bools, or a single bool if x1 and x2 are scalars.
+ out : ndarray
+ Output array of bools.
See Also
--------
@@ -140,8 +143,8 @@ def not_equal(x1, x2):
Returns
-------
- out : ndarray or bool
- Output array of bools, or a single bool if x1 and x2 are scalars.
+ out : ndarray
+ Output array of bools.
See Also
--------
@@ -167,8 +170,8 @@ def greater_equal(x1, x2):
Returns
-------
- out : ndarray or bool
- Output array of bools, or a single bool if x1 and x2 are scalars.
+ out : ndarray
+ Output array of bools.
See Also
--------
@@ -193,8 +196,8 @@ def less_equal(x1, x2):
Returns
-------
- out : ndarray or bool
- Output array of bools, or a single bool if x1 and x2 are scalars.
+ out : ndarray
+ Output array of bools.
See Also
--------
@@ -219,8 +222,8 @@ def greater(x1, x2):
Returns
-------
- out : ndarray or bool
- Output array of bools, or a single bool if x1 and x2 are scalars.
+ out : ndarray
+ Output array of bools.
See Also
--------
@@ -245,8 +248,8 @@ def less(x1, x2):
Returns
-------
- out : ndarray or bool
- Output array of bools, or a single bool if x1 and x2 are scalars.
+ out : ndarray
+ Output array of bools.
See Also
--------
@@ -273,9 +276,21 @@ def str_len(a):
out : ndarray
Output array of integers
- See also
+ See Also
+ --------
+ len
+
+ Examples
--------
- builtins.len
+ >>> a = np.array(['Grace Hopper Conference', 'Open Source Day'])
+ >>> np.char.str_len(a)
+ array([23, 15])
+ >>> a = np.array([u'\u0420', u'\u043e'])
+ >>> np.char.str_len(a)
+ array([1, 1])
+ >>> a = np.array([['hello', 'world'], [u'\u0420', u'\u043e']])
+ >>> np.char.str_len(a)
+ array([[5, 5], [1, 1]])
"""
# Note: __len__, etc. currently return ints, which are not C-integers.
# Generally intp would be expected for lengths, although int is sufficient
@@ -300,16 +315,26 @@ def add(x1, x2):
Returns
-------
add : ndarray
- Output array of `string_` or `unicode_`, depending on input types
+ Output array of `bytes_` or `str_`, depending on input types
of the same shape as `x1` and `x2`.
"""
arr1 = numpy.asarray(x1)
arr2 = numpy.asarray(x2)
out_size = _get_num_chars(arr1) + _get_num_chars(arr2)
- dtype = _use_unicode(arr1, arr2)
- return _vec_string(arr1, (dtype, out_size), '__add__', (arr2,))
+ if type(arr1.dtype) != type(arr2.dtype):
+ # Enforce this for now. The solution to it will be implement add
+ # as a ufunc. It never worked right on Python 3: bytes + unicode gave
+ # nonsense unicode + bytes errored, and unicode + object used the
+ # object dtype itemsize as num chars (worked on short strings).
+ # bytes + void worked but promoting void->bytes is dubious also.
+ raise TypeError(
+ "np.char.add() requires both arrays of the same dtype kind, but "
+ f"got dtypes: '{arr1.dtype}' and '{arr2.dtype}' (the few cases "
+ "where this used to work often lead to incorrect results).")
+
+ return _vec_string(arr1, type(arr1.dtype)(out_size), '__add__', (arr2,))
def _multiply_dispatcher(a, i):
return (a,)
@@ -334,7 +359,24 @@ def multiply(a, i):
-------
out : ndarray
Output array of str or unicode, depending on input types
-
+
+ Examples
+ --------
+ >>> a = np.array(["a", "b", "c"])
+ >>> np.char.multiply(x, 3)
+ array(['aaa', 'bbb', 'ccc'], dtype='<U3')
+ >>> i = np.array([1, 2, 3])
+ >>> np.char.multiply(a, i)
+ array(['a', 'bb', 'ccc'], dtype='<U3')
+ >>> np.char.multiply(np.array(['a']), i)
+ array(['a', 'aa', 'aaa'], dtype='<U3')
+ >>> a = np.array(['a', 'b', 'c', 'd', 'e', 'f']).reshape((2, 3))
+ >>> np.char.multiply(a, 3)
+ array([['aaa', 'bbb', 'ccc'],
+ ['ddd', 'eee', 'fff']], dtype='<U3')
+ >>> np.char.multiply(a, i)
+ array([['a', 'bb', 'ccc'],
+ ['d', 'ee', 'fff']], dtype='<U3')
"""
a_arr = numpy.asarray(a)
i_arr = numpy.asarray(i)
@@ -342,7 +384,7 @@ def multiply(a, i):
raise ValueError("Can only multiply by integers")
out_size = _get_num_chars(a_arr) * max(int(i_arr.max()), 0)
return _vec_string(
- a_arr, (a_arr.dtype.type, out_size), '__mul__', (i_arr,))
+ a_arr, type(a_arr.dtype)(out_size), '__mul__', (i_arr,))
def _mod_dispatcher(a, values):
@@ -368,13 +410,13 @@ def mod(a, values):
out : ndarray
Output array of str or unicode, depending on input types
- See also
+ See Also
--------
str.__mod__
"""
- return _to_string_or_unicode_array(
- _vec_string(a, object_, '__mod__', (values,)))
+ return _to_bytes_or_str_array(
+ _vec_string(a, object_, '__mod__', (values,)), a)
@array_function_dispatch(_unary_op_dispatcher)
@@ -398,7 +440,7 @@ def capitalize(a):
Output array of str or unicode, depending on input
types
- See also
+ See Also
--------
str.capitalize
@@ -443,18 +485,34 @@ def center(a, width, fillchar=' '):
Output array of str or unicode, depending on input
types
- See also
+ See Also
--------
str.center
+
+ Notes
+ -----
+ This function is intended to work with arrays of strings. The
+ fill character is not applied to numeric types.
+
+ Examples
+ --------
+ >>> c = np.array(['a1b2','1b2a','b2a1','2a1b']); c
+ array(['a1b2', '1b2a', 'b2a1', '2a1b'], dtype='<U4')
+ >>> np.char.center(c, width=9)
+ array([' a1b2 ', ' 1b2a ', ' b2a1 ', ' 2a1b '], dtype='<U9')
+ >>> np.char.center(c, width=9, fillchar='*')
+ array(['***a1b2**', '***1b2a**', '***b2a1**', '***2a1b**'], dtype='<U9')
+ >>> np.char.center(c, width=1)
+ array(['a', '1', 'b', '2'], dtype='<U1')
"""
a_arr = numpy.asarray(a)
width_arr = numpy.asarray(width)
size = int(numpy.max(width_arr.flat))
- if numpy.issubdtype(a_arr.dtype, numpy.string_):
+ if numpy.issubdtype(a_arr.dtype, numpy.bytes_):
fillchar = asbytes(fillchar)
return _vec_string(
- a_arr, (a_arr.dtype.type, size), 'center', (width_arr, fillchar))
+ a_arr, type(a_arr.dtype)(size), 'center', (width_arr, fillchar))
def _count_dispatcher(a, sub, start=None, end=None):
@@ -485,7 +543,7 @@ def count(a, sub, start=0, end=None):
out : ndarray
Output array of ints.
- See also
+ See Also
--------
str.count
@@ -513,8 +571,8 @@ def _code_dispatcher(a, encoding=None, errors=None):
@array_function_dispatch(_code_dispatcher)
def decode(a, encoding=None, errors=None):
- """
- Calls `str.decode` element-wise.
+ r"""
+ Calls ``bytes.decode`` element-wise.
The set of available codecs comes from the Python standard library,
and may be extended at runtime. For more information, see the
@@ -534,9 +592,9 @@ def decode(a, encoding=None, errors=None):
-------
out : ndarray
- See also
+ See Also
--------
- str.decode
+ :py:meth:`bytes.decode`
Notes
-----
@@ -544,16 +602,16 @@ def decode(a, encoding=None, errors=None):
Examples
--------
- >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
+ >>> c = np.array([b'\x81\xc1\x81\xc1\x81\xc1', b'@@\x81\xc1@@',
+ ... b'\x81\x82\xc2\xc1\xc2\x82\x81'])
>>> c
+ array([b'\x81\xc1\x81\xc1\x81\xc1', b'@@\x81\xc1@@',
+ ... b'\x81\x82\xc2\xc1\xc2\x82\x81'], dtype='|S7')
+ >>> np.char.decode(c, encoding='cp037')
array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
- >>> np.char.encode(c, encoding='cp037')
- array(['\\x81\\xc1\\x81\\xc1\\x81\\xc1', '@@\\x81\\xc1@@',
- '\\x81\\x82\\xc2\\xc1\\xc2\\x82\\x81'],
- dtype='|S7')
"""
- return _to_string_or_unicode_array(
+ return _to_bytes_or_str_array(
_vec_string(a, object_, 'decode', _clean_args(encoding, errors)))
@@ -580,7 +638,7 @@ def encode(a, encoding=None, errors=None):
-------
out : ndarray
- See also
+ See Also
--------
str.encode
@@ -589,7 +647,7 @@ def encode(a, encoding=None, errors=None):
The type of the result will depend on the encoding specified.
"""
- return _to_string_or_unicode_array(
+ return _to_bytes_or_str_array(
_vec_string(a, object_, 'encode', _clean_args(encoding, errors)))
@@ -620,7 +678,7 @@ def endswith(a, suffix, start=0, end=None):
out : ndarray
Outputs an array of bools.
- See also
+ See Also
--------
str.endswith
@@ -672,13 +730,13 @@ def expandtabs(a, tabsize=8):
out : ndarray
Output array of str or unicode, depending on input type
- See also
+ See Also
--------
str.expandtabs
"""
- return _to_string_or_unicode_array(
- _vec_string(a, object_, 'expandtabs', (tabsize,)))
+ return _to_bytes_or_str_array(
+ _vec_string(a, object_, 'expandtabs', (tabsize,)), a)
@array_function_dispatch(_count_dispatcher)
@@ -708,10 +766,16 @@ def find(a, sub, start=0, end=None):
out : ndarray or int
Output array of ints. Returns -1 if `sub` is not found.
- See also
+ See Also
--------
str.find
+ Examples
+ --------
+ >>> a = np.array(["NumPy is a Python library"])
+ >>> np.char.find(a, "Python", start=0, end=None)
+ array([11])
+
"""
return _vec_string(
a, int_, 'find', [sub, start] + _clean_args(end))
@@ -737,10 +801,16 @@ def index(a, sub, start=0, end=None):
out : ndarray
Output array of ints. Returns -1 if `sub` is not found.
- See also
+ See Also
--------
find, str.find
+ Examples
+ --------
+ >>> a = np.array(["Computer Science"])
+ >>> np.char.index(a, "Science", start=0, end=None)
+ array([9])
+
"""
return _vec_string(
a, int_, 'index', [sub, start] + _clean_args(end))
@@ -765,7 +835,7 @@ def isalnum(a):
out : ndarray
Output array of str or unicode, depending on input type
- See also
+ See Also
--------
str.isalnum
"""
@@ -791,7 +861,7 @@ def isalpha(a):
out : ndarray
Output array of bools
- See also
+ See Also
--------
str.isalpha
"""
@@ -817,9 +887,18 @@ def isdigit(a):
out : ndarray
Output array of bools
- See also
+ See Also
--------
str.isdigit
+
+ Examples
+ --------
+ >>> a = np.array(['a', 'b', '0'])
+ >>> np.char.isdigit(a)
+ array([False, False, True])
+ >>> a = np.array([['a', 'b', '0'], ['c', '1', '2']])
+ >>> np.char.isdigit(a)
+ array([[False, False, True], [False, True, True]])
"""
return _vec_string(a, bool_, 'isdigit')
@@ -844,7 +923,7 @@ def islower(a):
out : ndarray
Output array of bools
- See also
+ See Also
--------
str.islower
"""
@@ -871,7 +950,7 @@ def isspace(a):
out : ndarray
Output array of bools
- See also
+ See Also
--------
str.isspace
"""
@@ -897,7 +976,7 @@ def istitle(a):
out : ndarray
Output array of bools
- See also
+ See Also
--------
str.istitle
"""
@@ -907,7 +986,7 @@ def istitle(a):
@array_function_dispatch(_unary_op_dispatcher)
def isupper(a):
"""
- Returns true for each element if all cased characters in the
+ Return true for each element if all cased characters in the
string are uppercase and there is at least one character, false
otherwise.
@@ -924,9 +1003,19 @@ def isupper(a):
out : ndarray
Output array of bools
- See also
+ See Also
--------
str.isupper
+
+ Examples
+ --------
+ >>> str = "GHC"
+ >>> np.char.isupper(str)
+ array(True)
+ >>> a = np.array(["hello", "HELLO", "Hello"])
+ >>> np.char.isupper(a)
+ array([False, True, False])
+
"""
return _vec_string(a, bool_, 'isupper')
@@ -953,12 +1042,21 @@ def join(sep, seq):
out : ndarray
Output array of str or unicode, depending on input types
- See also
+ See Also
--------
str.join
+
+ Examples
+ --------
+ >>> np.char.join('-', 'osd')
+ array('o-s-d', dtype='<U5')
+
+ >>> np.char.join(['-', '.'], ['ghc', 'osd'])
+ array(['g-h-c', 'o.s.d'], dtype='<U5')
+
"""
- return _to_string_or_unicode_array(
- _vec_string(sep, object_, 'join', (seq,)))
+ return _to_bytes_or_str_array(
+ _vec_string(sep, object_, 'join', (seq,)), seq)
@@ -988,7 +1086,7 @@ def ljust(a, width, fillchar=' '):
out : ndarray
Output array of str or unicode, depending on input type
- See also
+ See Also
--------
str.ljust
@@ -996,10 +1094,10 @@ def ljust(a, width, fillchar=' '):
a_arr = numpy.asarray(a)
width_arr = numpy.asarray(width)
size = int(numpy.max(width_arr.flat))
- if numpy.issubdtype(a_arr.dtype, numpy.string_):
+ if numpy.issubdtype(a_arr.dtype, numpy.bytes_):
fillchar = asbytes(fillchar)
return _vec_string(
- a_arr, (a_arr.dtype.type, size), 'ljust', (width_arr, fillchar))
+ a_arr, type(a_arr.dtype)(size), 'ljust', (width_arr, fillchar))
@array_function_dispatch(_unary_op_dispatcher)
@@ -1021,7 +1119,7 @@ def lower(a):
out : ndarray, {str, unicode}
Output array of str or unicode, depending on input type
- See also
+ See Also
--------
str.lower
@@ -1066,7 +1164,7 @@ def lstrip(a, chars=None):
out : ndarray, {str, unicode}
Output array of str or unicode, depending on input type
- See also
+ See Also
--------
str.lstrip
@@ -1127,13 +1225,13 @@ def partition(a, sep):
The output array will have an extra dimension with 3
elements per input element.
- See also
+ See Also
--------
str.partition
"""
- return _to_string_or_unicode_array(
- _vec_string(a, object_, 'partition', (sep,)))
+ return _to_bytes_or_str_array(
+ _vec_string(a, object_, 'partition', (sep,)), a)
def _replace_dispatcher(a, old, new, count=None):
@@ -1163,14 +1261,22 @@ def replace(a, old, new, count=None):
out : ndarray
Output array of str or unicode, depending on input type
- See also
+ See Also
--------
str.replace
+
+ Examples
+ --------
+ >>> a = np.array(["That is a mango", "Monkeys eat mangos"])
+ >>> np.char.replace(a, 'mango', 'banana')
+ array(['That is a banana', 'Monkeys eat bananas'], dtype='<U19')
+ >>> a = np.array(["The dish is fresh", "This is it"])
+ >>> np.char.replace(a, 'is', 'was')
+ array(['The dwash was fresh', 'Thwas was it'], dtype='<U19')
"""
- return _to_string_or_unicode_array(
- _vec_string(
- a, object_, 'replace', [old, new] + _clean_args(count)))
+ return _to_bytes_or_str_array(
+ _vec_string(a, object_, 'replace', [old, new] + _clean_args(count)), a)
@array_function_dispatch(_count_dispatcher)
@@ -1197,7 +1303,7 @@ def rfind(a, sub, start=0, end=None):
out : ndarray
Output array of ints. Return -1 on failure.
- See also
+ See Also
--------
str.rfind
@@ -1227,7 +1333,7 @@ def rindex(a, sub, start=0, end=None):
out : ndarray
Output array of ints.
- See also
+ See Also
--------
rfind, str.rindex
@@ -1258,7 +1364,7 @@ def rjust(a, width, fillchar=' '):
out : ndarray
Output array of str or unicode, depending on input type
- See also
+ See Also
--------
str.rjust
@@ -1266,10 +1372,10 @@ def rjust(a, width, fillchar=' '):
a_arr = numpy.asarray(a)
width_arr = numpy.asarray(width)
size = int(numpy.max(width_arr.flat))
- if numpy.issubdtype(a_arr.dtype, numpy.string_):
+ if numpy.issubdtype(a_arr.dtype, numpy.bytes_):
fillchar = asbytes(fillchar)
return _vec_string(
- a_arr, (a_arr.dtype.type, size), 'rjust', (width_arr, fillchar))
+ a_arr, type(a_arr.dtype)(size), 'rjust', (width_arr, fillchar))
@array_function_dispatch(_partition_dispatcher)
@@ -1299,13 +1405,13 @@ def rpartition(a, sep):
type. The output array will have an extra dimension with
3 elements per input element.
- See also
+ See Also
--------
str.rpartition
"""
- return _to_string_or_unicode_array(
- _vec_string(a, object_, 'rpartition', (sep,)))
+ return _to_bytes_or_str_array(
+ _vec_string(a, object_, 'rpartition', (sep,)), a)
def _split_dispatcher(a, sep=None, maxsplit=None):
@@ -1339,7 +1445,7 @@ def rsplit(a, sep=None, maxsplit=None):
out : ndarray
Array of list objects
- See also
+ See Also
--------
str.rsplit, split
@@ -1378,7 +1484,7 @@ def rstrip(a, chars=None):
out : ndarray
Output array of str or unicode, depending on input type
- See also
+ See Also
--------
str.rstrip
@@ -1423,7 +1529,7 @@ def split(a, sep=None, maxsplit=None):
out : ndarray
Array of list objects
- See also
+ See Also
--------
str.split, rsplit
@@ -1459,7 +1565,7 @@ def splitlines(a, keepends=None):
out : ndarray
Array of list objects
- See also
+ See Also
--------
str.splitlines
@@ -1495,7 +1601,7 @@ def startswith(a, prefix, start=0, end=None):
out : ndarray
Array of booleans
- See also
+ See Also
--------
str.startswith
@@ -1528,7 +1634,7 @@ def strip(a, chars=None):
out : ndarray
Output array of str or unicode, depending on input type
- See also
+ See Also
--------
str.strip
@@ -1569,7 +1675,7 @@ def swapcase(a):
out : ndarray, {str, unicode}
Output array of str or unicode, depending on input type
- See also
+ See Also
--------
str.swapcase
@@ -1609,7 +1715,7 @@ def title(a):
out : ndarray
Output array of str or unicode, depending on input type
- See also
+ See Also
--------
str.title
@@ -1654,13 +1760,13 @@ def translate(a, table, deletechars=None):
out : ndarray
Output array of str or unicode, depending on input type
- See also
+ See Also
--------
str.translate
"""
a_arr = numpy.asarray(a)
- if issubclass(a_arr.dtype.type, unicode_):
+ if issubclass(a_arr.dtype.type, str_):
return _vec_string(
a_arr, a_arr.dtype, 'translate', (table,))
else:
@@ -1687,7 +1793,7 @@ def upper(a):
out : ndarray, {str, unicode}
Output array of str or unicode, depending on input type
- See also
+ See Also
--------
str.upper
@@ -1726,7 +1832,7 @@ def zfill(a, width):
out : ndarray, {str, unicode}
Output array of str or unicode, depending on input type
- See also
+ See Also
--------
str.zfill
@@ -1735,7 +1841,7 @@ def zfill(a, width):
width_arr = numpy.asarray(width)
size = int(numpy.max(width_arr.flat))
return _vec_string(
- a_arr, (a_arr.dtype.type, size), 'zfill', (width_arr,))
+ a_arr, type(a_arr.dtype)(size), 'zfill', (width_arr,))
@array_function_dispatch(_unary_op_dispatcher)
@@ -1744,7 +1850,7 @@ def isnumeric(a):
For each element, return True if there are only numeric
characters in the element.
- Calls `unicode.isnumeric` element-wise.
+ Calls `str.isnumeric` element-wise.
Numeric characters include digit characters, and all characters
that have the Unicode numeric value property, e.g. ``U+2155,
@@ -1760,12 +1866,17 @@ def isnumeric(a):
out : ndarray, bool
Array of booleans of same shape as `a`.
- See also
+ See Also
+ --------
+ str.isnumeric
+
+ Examples
--------
- unicode.isnumeric
+ >>> np.char.isnumeric(['123', '123abc', '9.0', '1/4', 'VIII'])
+ array([ True, False, False, False, False])
"""
- if _use_unicode(a) != unicode_:
+ if not _is_unicode(a):
raise TypeError("isnumeric is only available for Unicode strings and arrays")
return _vec_string(a, bool_, 'isnumeric')
@@ -1776,7 +1887,7 @@ def isdecimal(a):
For each element, return True if there are only decimal
characters in the element.
- Calls `unicode.isdecimal` element-wise.
+ Calls `str.isdecimal` element-wise.
Decimal characters include digit characters, and all characters
that can be used to form decimal-radix numbers,
@@ -1792,13 +1903,19 @@ def isdecimal(a):
out : ndarray, bool
Array of booleans identical in shape to `a`.
- See also
+ See Also
--------
- unicode.isdecimal
+ str.isdecimal
- """
- if _use_unicode(a) != unicode_:
- raise TypeError("isnumeric is only available for Unicode strings and arrays")
+ Examples
+ --------
+ >>> np.char.isdecimal(['12345', '4.99', '123ABC', ''])
+ array([ True, False, False, False])
+
+ """
+ if not _is_unicode(a):
+ raise TypeError(
+ "isdecimal is only available for Unicode strings and arrays")
return _vec_string(a, bool_, 'isdecimal')
@@ -1814,7 +1931,7 @@ class chararray(ndarray):
The `chararray` class exists for backwards compatibility with
Numarray, it is not recommended for new development. Starting from numpy
1.4, if one needs arrays of strings, it is recommended to use arrays of
- `dtype` `object_`, `string_` or `unicode_`, and use the free functions
+ `dtype` `object_`, `bytes_` or `str_`, and use the free functions
in the `numpy.char` module for fast vectorized string operations.
Versus a regular NumPy array of type `str` or `unicode`, this
@@ -1948,9 +2065,9 @@ class chararray(ndarray):
global _globalvar
if unicode:
- dtype = unicode_
+ dtype = str_
else:
- dtype = string_
+ dtype = bytes_
# force itemsize to be a Python int, since using NumPy integer
# types results in itemsize.itemsize being used as the size of
@@ -2004,7 +2121,7 @@ class chararray(ndarray):
"""
Return (self == other) element-wise.
- See also
+ See Also
--------
equal
"""
@@ -2014,7 +2131,7 @@ class chararray(ndarray):
"""
Return (self != other) element-wise.
- See also
+ See Also
--------
not_equal
"""
@@ -2024,7 +2141,7 @@ class chararray(ndarray):
"""
Return (self >= other) element-wise.
- See also
+ See Also
--------
greater_equal
"""
@@ -2034,7 +2151,7 @@ class chararray(ndarray):
"""
Return (self <= other) element-wise.
- See also
+ See Also
--------
less_equal
"""
@@ -2044,7 +2161,7 @@ class chararray(ndarray):
"""
Return (self > other) element-wise.
- See also
+ See Also
--------
greater
"""
@@ -2054,7 +2171,7 @@ class chararray(ndarray):
"""
Return (self < other) element-wise.
- See also
+ See Also
--------
less
"""
@@ -2065,7 +2182,7 @@ class chararray(ndarray):
Return (self + other), that is string concatenation,
element-wise for a pair of array_likes of str or unicode.
- See also
+ See Also
--------
add
"""
@@ -2074,9 +2191,9 @@ class chararray(ndarray):
def __radd__(self, other):
"""
Return (other + self), that is string concatenation,
- element-wise for a pair of array_likes of `string_` or `unicode_`.
+ element-wise for a pair of array_likes of `bytes_` or `str_`.
- See also
+ See Also
--------
add
"""
@@ -2087,7 +2204,7 @@ class chararray(ndarray):
Return (self * i), that is string multiple concatenation,
element-wise.
- See also
+ See Also
--------
multiply
"""
@@ -2098,7 +2215,7 @@ class chararray(ndarray):
Return (self * i), that is string multiple concatenation,
element-wise.
- See also
+ See Also
--------
multiply
"""
@@ -2107,10 +2224,10 @@ class chararray(ndarray):
def __mod__(self, i):
"""
Return (self % i), that is pre-Python 2.6 string formatting
- (interpolation), element-wise for a pair of array_likes of `string_`
- or `unicode_`.
+ (interpolation), element-wise for a pair of array_likes of `bytes_`
+ or `str_`.
- See also
+ See Also
--------
mod
"""
@@ -2145,7 +2262,7 @@ class chararray(ndarray):
Return a copy of `self` with only the first character of each element
capitalized.
- See also
+ See Also
--------
char.capitalize
@@ -2157,7 +2274,7 @@ class chararray(ndarray):
Return a copy of `self` with its elements centered in a
string of length `width`.
- See also
+ See Also
--------
center
"""
@@ -2168,7 +2285,7 @@ class chararray(ndarray):
Returns an array with the number of non-overlapping occurrences of
substring `sub` in the range [`start`, `end`].
- See also
+ See Also
--------
char.count
@@ -2177,9 +2294,9 @@ class chararray(ndarray):
def decode(self, encoding=None, errors=None):
"""
- Calls `str.decode` element-wise.
+ Calls ``bytes.decode`` element-wise.
- See also
+ See Also
--------
char.decode
@@ -2190,7 +2307,7 @@ class chararray(ndarray):
"""
Calls `str.encode` element-wise.
- See also
+ See Also
--------
char.encode
@@ -2202,7 +2319,7 @@ class chararray(ndarray):
Returns a boolean array which is `True` where the string element
in `self` ends with `suffix`, otherwise `False`.
- See also
+ See Also
--------
char.endswith
@@ -2214,7 +2331,7 @@ class chararray(ndarray):
Return a copy of each string element where all tab characters are
replaced by one or more spaces.
- See also
+ See Also
--------
char.expandtabs
@@ -2226,7 +2343,7 @@ class chararray(ndarray):
For each element, return the lowest index in the string where
substring `sub` is found.
- See also
+ See Also
--------
char.find
@@ -2237,7 +2354,7 @@ class chararray(ndarray):
"""
Like `find`, but raises `ValueError` when the substring is not found.
- See also
+ See Also
--------
char.index
@@ -2250,7 +2367,7 @@ class chararray(ndarray):
are alphanumeric and there is at least one character, false
otherwise.
- See also
+ See Also
--------
char.isalnum
@@ -2263,7 +2380,7 @@ class chararray(ndarray):
are alphabetic and there is at least one character, false
otherwise.
- See also
+ See Also
--------
char.isalpha
@@ -2275,7 +2392,7 @@ class chararray(ndarray):
Returns true for each element if all characters in the string are
digits and there is at least one character, false otherwise.
- See also
+ See Also
--------
char.isdigit
@@ -2288,7 +2405,7 @@ class chararray(ndarray):
string are lowercase and there is at least one cased character,
false otherwise.
- See also
+ See Also
--------
char.islower
@@ -2301,7 +2418,7 @@ class chararray(ndarray):
characters in the string and there is at least one character,
false otherwise.
- See also
+ See Also
--------
char.isspace
@@ -2313,7 +2430,7 @@ class chararray(ndarray):
Returns true for each element if the element is a titlecased
string and there is at least one character, false otherwise.
- See also
+ See Also
--------
char.istitle
@@ -2326,7 +2443,7 @@ class chararray(ndarray):
string are uppercase and there is at least one character, false
otherwise.
- See also
+ See Also
--------
char.isupper
@@ -2338,7 +2455,7 @@ class chararray(ndarray):
Return a string which is the concatenation of the strings in the
sequence `seq`.
- See also
+ See Also
--------
char.join
@@ -2350,7 +2467,7 @@ class chararray(ndarray):
Return an array with the elements of `self` left-justified in a
string of length `width`.
- See also
+ See Also
--------
char.ljust
@@ -2362,7 +2479,7 @@ class chararray(ndarray):
Return an array with the elements of `self` converted to
lowercase.
- See also
+ See Also
--------
char.lower
@@ -2374,7 +2491,7 @@ class chararray(ndarray):
For each element in `self`, return a copy with the leading characters
removed.
- See also
+ See Also
--------
char.lstrip
@@ -2385,7 +2502,7 @@ class chararray(ndarray):
"""
Partition each element in `self` around `sep`.
- See also
+ See Also
--------
partition
"""
@@ -2396,7 +2513,7 @@ class chararray(ndarray):
For each element in `self`, return a copy of the string with all
occurrences of substring `old` replaced by `new`.
- See also
+ See Also
--------
char.replace
@@ -2409,7 +2526,7 @@ class chararray(ndarray):
where substring `sub` is found, such that `sub` is contained
within [`start`, `end`].
- See also
+ See Also
--------
char.rfind
@@ -2421,7 +2538,7 @@ class chararray(ndarray):
Like `rfind`, but raises `ValueError` when the substring `sub` is
not found.
- See also
+ See Also
--------
char.rindex
@@ -2433,7 +2550,7 @@ class chararray(ndarray):
Return an array with the elements of `self`
right-justified in a string of length `width`.
- See also
+ See Also
--------
char.rjust
@@ -2444,7 +2561,7 @@ class chararray(ndarray):
"""
Partition each element in `self` around `sep`.
- See also
+ See Also
--------
rpartition
"""
@@ -2455,7 +2572,7 @@ class chararray(ndarray):
For each element in `self`, return a list of the words in
the string, using `sep` as the delimiter string.
- See also
+ See Also
--------
char.rsplit
@@ -2467,7 +2584,7 @@ class chararray(ndarray):
For each element in `self`, return a copy with the trailing
characters removed.
- See also
+ See Also
--------
char.rstrip
@@ -2479,7 +2596,7 @@ class chararray(ndarray):
For each element in `self`, return a list of the words in the
string, using `sep` as the delimiter string.
- See also
+ See Also
--------
char.split
@@ -2491,7 +2608,7 @@ class chararray(ndarray):
For each element in `self`, return a list of the lines in the
element, breaking at line boundaries.
- See also
+ See Also
--------
char.splitlines
@@ -2503,7 +2620,7 @@ class chararray(ndarray):
Returns a boolean array which is `True` where the string element
in `self` starts with `prefix`, otherwise `False`.
- See also
+ See Also
--------
char.startswith
@@ -2515,7 +2632,7 @@ class chararray(ndarray):
For each element in `self`, return a copy with the leading and
trailing characters removed.
- See also
+ See Also
--------
char.strip
@@ -2527,7 +2644,7 @@ class chararray(ndarray):
For each element in `self`, return a copy of the string with
uppercase characters converted to lowercase and vice versa.
- See also
+ See Also
--------
char.swapcase
@@ -2540,7 +2657,7 @@ class chararray(ndarray):
string: words start with uppercase characters, all remaining cased
characters are lowercase.
- See also
+ See Also
--------
char.title
@@ -2554,7 +2671,7 @@ class chararray(ndarray):
`deletechars` are removed, and the remaining characters have
been mapped through the given translation table.
- See also
+ See Also
--------
char.translate
@@ -2566,7 +2683,7 @@ class chararray(ndarray):
Return an array with the elements of `self` converted to
uppercase.
- See also
+ See Also
--------
char.upper
@@ -2578,7 +2695,7 @@ class chararray(ndarray):
Return the numeric string left-filled with zeros in a string of
length `width`.
- See also
+ See Also
--------
char.zfill
@@ -2590,7 +2707,7 @@ class chararray(ndarray):
For each element in `self`, return True if there are only
numeric characters in the element.
- See also
+ See Also
--------
char.isnumeric
@@ -2602,7 +2719,7 @@ class chararray(ndarray):
For each element in `self`, return True if there are only
decimal characters in the element.
- See also
+ See Also
--------
char.isdecimal
@@ -2610,6 +2727,7 @@ class chararray(ndarray):
return isdecimal(self)
+@set_module("numpy.char")
def array(obj, itemsize=None, copy=True, unicode=None, order=None):
"""
Create a `chararray`.
@@ -2617,7 +2735,7 @@ def array(obj, itemsize=None, copy=True, unicode=None, order=None):
.. note::
This class is provided for numarray backward-compatibility.
New code (not concerned with numarray compatibility) should use
- arrays of type `string_` or `unicode_` and use the free functions
+ arrays of type `bytes_` or `str_` and use the free functions
in :mod:`numpy.char <numpy.core.defchararray>` for fast
vectorized string operations instead.
@@ -2700,26 +2818,26 @@ def array(obj, itemsize=None, copy=True, unicode=None, order=None):
# itemsize is in 8-bit chars, so for Unicode, we need
# to divide by the size of a single Unicode character,
# which for NumPy is always 4
- if issubclass(obj.dtype.type, unicode_):
+ if issubclass(obj.dtype.type, str_):
itemsize //= 4
if unicode is None:
- if issubclass(obj.dtype.type, unicode_):
+ if issubclass(obj.dtype.type, str_):
unicode = True
else:
unicode = False
if unicode:
- dtype = unicode_
+ dtype = str_
else:
- dtype = string_
+ dtype = bytes_
if order is not None:
obj = numpy.asarray(obj, order=order)
if (copy or
(itemsize != obj.itemsize) or
- (not unicode and isinstance(obj, unicode_)) or
- (unicode and isinstance(obj, string_))):
+ (not unicode and isinstance(obj, str_)) or
+ (unicode and isinstance(obj, bytes_))):
obj = obj.astype((dtype, int(itemsize)))
return obj
@@ -2732,9 +2850,9 @@ def array(obj, itemsize=None, copy=True, unicode=None, order=None):
# Fall through to the default case
if unicode:
- dtype = unicode_
+ dtype = str_
else:
- dtype = string_
+ dtype = bytes_
if itemsize is None:
val = narray(obj, dtype=dtype, order=order, subok=True)
@@ -2743,6 +2861,7 @@ def array(obj, itemsize=None, copy=True, unicode=None, order=None):
return val.view(chararray)
+@set_module("numpy.char")
def asarray(obj, itemsize=None, unicode=None, order=None):
"""
Convert the input to a `chararray`, copying the data only if