diff options
author | scoder <stefan_ml@behnel.de> | 2023-05-04 09:29:53 +0200 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-05-04 09:29:53 +0200 |
commit | 442c8f48d3146ec32c7d5387310e171276cf10ac (patch) | |
tree | d8911d1a64e384b7955d3fc09a07edd218a9f1ee /numpy/core/defchararray.py | |
parent | 3e4a6cba2da27bbe2a6e12c163238e503c9f6a07 (diff) | |
parent | 9163e933df91b516b6f0c7a9ba8ad1750e642f37 (diff) | |
download | numpy-442c8f48d3146ec32c7d5387310e171276cf10ac.tar.gz |
Merge branch 'main' into cython3_noexcept
Diffstat (limited to 'numpy/core/defchararray.py')
-rw-r--r-- | numpy/core/defchararray.py | 489 |
1 files changed, 304 insertions, 185 deletions
diff --git a/numpy/core/defchararray.py b/numpy/core/defchararray.py index 1d447b86a..11c5a30bf 100644 --- a/numpy/core/defchararray.py +++ b/numpy/core/defchararray.py @@ -6,7 +6,7 @@ operations and methods. The `chararray` class exists for backwards compatibility with Numarray, it is not recommended for new development. Starting from numpy 1.4, if one needs arrays of strings, it is recommended to use arrays of - `dtype` `object_`, `string_` or `unicode_`, and use the free functions + `dtype` `object_`, `bytes_` or `str_`, and use the free functions in the `numpy.char` module for fast vectorized string operations. Some methods will only be available if the corresponding string method is @@ -16,13 +16,13 @@ The preferred alias for `defchararray` is `numpy.char`. """ import functools -import sys + +from .._utils import set_module from .numerictypes import ( - string_, unicode_, integer, int_, object_, bool_, character) + bytes_, str_, integer, int_, object_, bool_, character) from .numeric import ndarray, compare_chararrays from .numeric import array as narray from numpy.core.multiarray import _vec_string -from numpy.core.overrides import set_module from numpy.core import overrides from numpy.compat import asbytes import numpy @@ -46,26 +46,29 @@ array_function_dispatch = functools.partial( overrides.array_function_dispatch, module='numpy.char') -def _use_unicode(*args): - """ - Helper function for determining the output type of some string - operations. +def _is_unicode(arr): + """Returns True if arr is a string or a string array with a dtype that + represents a unicode string, otherwise returns False. - For an operation on two ndarrays, if at least one is unicode, the - result should be unicode. """ - for x in args: - if (isinstance(x, str) or - issubclass(numpy.asarray(x).dtype.type, unicode_)): - return unicode_ - return string_ + if (isinstance(arr, str) or + issubclass(numpy.asarray(arr).dtype.type, str)): + return True + return False + -def _to_string_or_unicode_array(result): +def _to_bytes_or_str_array(result, output_dtype_like=None): """ - Helper function to cast a result back into a string or unicode array - if an object array must be used as an intermediary. + Helper function to cast a result back into an array + with the appropriate dtype if an object array must be used + as an intermediary. """ - return numpy.asarray(result.tolist()) + ret = numpy.asarray(result.tolist()) + dtype = getattr(output_dtype_like, 'dtype', None) + if dtype is not None: + return ret.astype(type(dtype)(_get_num_chars(ret)), copy=False) + return ret + def _clean_args(*args): """ @@ -89,7 +92,7 @@ def _get_num_chars(a): a string or unicode array. This is to abstract out the fact that for a unicode array this is itemsize / 4. """ - if issubclass(a.dtype.type, unicode_): + if issubclass(a.dtype.type, str_): return a.itemsize // 4 return a.itemsize @@ -114,8 +117,8 @@ def equal(x1, x2): Returns ------- - out : ndarray or bool - Output array of bools, or a single bool if x1 and x2 are scalars. + out : ndarray + Output array of bools. See Also -------- @@ -140,8 +143,8 @@ def not_equal(x1, x2): Returns ------- - out : ndarray or bool - Output array of bools, or a single bool if x1 and x2 are scalars. + out : ndarray + Output array of bools. See Also -------- @@ -167,8 +170,8 @@ def greater_equal(x1, x2): Returns ------- - out : ndarray or bool - Output array of bools, or a single bool if x1 and x2 are scalars. + out : ndarray + Output array of bools. See Also -------- @@ -193,8 +196,8 @@ def less_equal(x1, x2): Returns ------- - out : ndarray or bool - Output array of bools, or a single bool if x1 and x2 are scalars. + out : ndarray + Output array of bools. See Also -------- @@ -219,8 +222,8 @@ def greater(x1, x2): Returns ------- - out : ndarray or bool - Output array of bools, or a single bool if x1 and x2 are scalars. + out : ndarray + Output array of bools. See Also -------- @@ -245,8 +248,8 @@ def less(x1, x2): Returns ------- - out : ndarray or bool - Output array of bools, or a single bool if x1 and x2 are scalars. + out : ndarray + Output array of bools. See Also -------- @@ -273,9 +276,21 @@ def str_len(a): out : ndarray Output array of integers - See also + See Also + -------- + len + + Examples -------- - builtins.len + >>> a = np.array(['Grace Hopper Conference', 'Open Source Day']) + >>> np.char.str_len(a) + array([23, 15]) + >>> a = np.array([u'\u0420', u'\u043e']) + >>> np.char.str_len(a) + array([1, 1]) + >>> a = np.array([['hello', 'world'], [u'\u0420', u'\u043e']]) + >>> np.char.str_len(a) + array([[5, 5], [1, 1]]) """ # Note: __len__, etc. currently return ints, which are not C-integers. # Generally intp would be expected for lengths, although int is sufficient @@ -300,16 +315,26 @@ def add(x1, x2): Returns ------- add : ndarray - Output array of `string_` or `unicode_`, depending on input types + Output array of `bytes_` or `str_`, depending on input types of the same shape as `x1` and `x2`. """ arr1 = numpy.asarray(x1) arr2 = numpy.asarray(x2) out_size = _get_num_chars(arr1) + _get_num_chars(arr2) - dtype = _use_unicode(arr1, arr2) - return _vec_string(arr1, (dtype, out_size), '__add__', (arr2,)) + if type(arr1.dtype) != type(arr2.dtype): + # Enforce this for now. The solution to it will be implement add + # as a ufunc. It never worked right on Python 3: bytes + unicode gave + # nonsense unicode + bytes errored, and unicode + object used the + # object dtype itemsize as num chars (worked on short strings). + # bytes + void worked but promoting void->bytes is dubious also. + raise TypeError( + "np.char.add() requires both arrays of the same dtype kind, but " + f"got dtypes: '{arr1.dtype}' and '{arr2.dtype}' (the few cases " + "where this used to work often lead to incorrect results).") + + return _vec_string(arr1, type(arr1.dtype)(out_size), '__add__', (arr2,)) def _multiply_dispatcher(a, i): return (a,) @@ -334,7 +359,24 @@ def multiply(a, i): ------- out : ndarray Output array of str or unicode, depending on input types - + + Examples + -------- + >>> a = np.array(["a", "b", "c"]) + >>> np.char.multiply(x, 3) + array(['aaa', 'bbb', 'ccc'], dtype='<U3') + >>> i = np.array([1, 2, 3]) + >>> np.char.multiply(a, i) + array(['a', 'bb', 'ccc'], dtype='<U3') + >>> np.char.multiply(np.array(['a']), i) + array(['a', 'aa', 'aaa'], dtype='<U3') + >>> a = np.array(['a', 'b', 'c', 'd', 'e', 'f']).reshape((2, 3)) + >>> np.char.multiply(a, 3) + array([['aaa', 'bbb', 'ccc'], + ['ddd', 'eee', 'fff']], dtype='<U3') + >>> np.char.multiply(a, i) + array([['a', 'bb', 'ccc'], + ['d', 'ee', 'fff']], dtype='<U3') """ a_arr = numpy.asarray(a) i_arr = numpy.asarray(i) @@ -342,7 +384,7 @@ def multiply(a, i): raise ValueError("Can only multiply by integers") out_size = _get_num_chars(a_arr) * max(int(i_arr.max()), 0) return _vec_string( - a_arr, (a_arr.dtype.type, out_size), '__mul__', (i_arr,)) + a_arr, type(a_arr.dtype)(out_size), '__mul__', (i_arr,)) def _mod_dispatcher(a, values): @@ -368,13 +410,13 @@ def mod(a, values): out : ndarray Output array of str or unicode, depending on input types - See also + See Also -------- str.__mod__ """ - return _to_string_or_unicode_array( - _vec_string(a, object_, '__mod__', (values,))) + return _to_bytes_or_str_array( + _vec_string(a, object_, '__mod__', (values,)), a) @array_function_dispatch(_unary_op_dispatcher) @@ -398,7 +440,7 @@ def capitalize(a): Output array of str or unicode, depending on input types - See also + See Also -------- str.capitalize @@ -443,18 +485,34 @@ def center(a, width, fillchar=' '): Output array of str or unicode, depending on input types - See also + See Also -------- str.center + + Notes + ----- + This function is intended to work with arrays of strings. The + fill character is not applied to numeric types. + + Examples + -------- + >>> c = np.array(['a1b2','1b2a','b2a1','2a1b']); c + array(['a1b2', '1b2a', 'b2a1', '2a1b'], dtype='<U4') + >>> np.char.center(c, width=9) + array([' a1b2 ', ' 1b2a ', ' b2a1 ', ' 2a1b '], dtype='<U9') + >>> np.char.center(c, width=9, fillchar='*') + array(['***a1b2**', '***1b2a**', '***b2a1**', '***2a1b**'], dtype='<U9') + >>> np.char.center(c, width=1) + array(['a', '1', 'b', '2'], dtype='<U1') """ a_arr = numpy.asarray(a) width_arr = numpy.asarray(width) size = int(numpy.max(width_arr.flat)) - if numpy.issubdtype(a_arr.dtype, numpy.string_): + if numpy.issubdtype(a_arr.dtype, numpy.bytes_): fillchar = asbytes(fillchar) return _vec_string( - a_arr, (a_arr.dtype.type, size), 'center', (width_arr, fillchar)) + a_arr, type(a_arr.dtype)(size), 'center', (width_arr, fillchar)) def _count_dispatcher(a, sub, start=None, end=None): @@ -485,7 +543,7 @@ def count(a, sub, start=0, end=None): out : ndarray Output array of ints. - See also + See Also -------- str.count @@ -513,8 +571,8 @@ def _code_dispatcher(a, encoding=None, errors=None): @array_function_dispatch(_code_dispatcher) def decode(a, encoding=None, errors=None): - """ - Calls `str.decode` element-wise. + r""" + Calls ``bytes.decode`` element-wise. The set of available codecs comes from the Python standard library, and may be extended at runtime. For more information, see the @@ -534,9 +592,9 @@ def decode(a, encoding=None, errors=None): ------- out : ndarray - See also + See Also -------- - str.decode + :py:meth:`bytes.decode` Notes ----- @@ -544,16 +602,16 @@ def decode(a, encoding=None, errors=None): Examples -------- - >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']) + >>> c = np.array([b'\x81\xc1\x81\xc1\x81\xc1', b'@@\x81\xc1@@', + ... b'\x81\x82\xc2\xc1\xc2\x82\x81']) >>> c + array([b'\x81\xc1\x81\xc1\x81\xc1', b'@@\x81\xc1@@', + ... b'\x81\x82\xc2\xc1\xc2\x82\x81'], dtype='|S7') + >>> np.char.decode(c, encoding='cp037') array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7') - >>> np.char.encode(c, encoding='cp037') - array(['\\x81\\xc1\\x81\\xc1\\x81\\xc1', '@@\\x81\\xc1@@', - '\\x81\\x82\\xc2\\xc1\\xc2\\x82\\x81'], - dtype='|S7') """ - return _to_string_or_unicode_array( + return _to_bytes_or_str_array( _vec_string(a, object_, 'decode', _clean_args(encoding, errors))) @@ -580,7 +638,7 @@ def encode(a, encoding=None, errors=None): ------- out : ndarray - See also + See Also -------- str.encode @@ -589,7 +647,7 @@ def encode(a, encoding=None, errors=None): The type of the result will depend on the encoding specified. """ - return _to_string_or_unicode_array( + return _to_bytes_or_str_array( _vec_string(a, object_, 'encode', _clean_args(encoding, errors))) @@ -620,7 +678,7 @@ def endswith(a, suffix, start=0, end=None): out : ndarray Outputs an array of bools. - See also + See Also -------- str.endswith @@ -672,13 +730,13 @@ def expandtabs(a, tabsize=8): out : ndarray Output array of str or unicode, depending on input type - See also + See Also -------- str.expandtabs """ - return _to_string_or_unicode_array( - _vec_string(a, object_, 'expandtabs', (tabsize,))) + return _to_bytes_or_str_array( + _vec_string(a, object_, 'expandtabs', (tabsize,)), a) @array_function_dispatch(_count_dispatcher) @@ -708,10 +766,16 @@ def find(a, sub, start=0, end=None): out : ndarray or int Output array of ints. Returns -1 if `sub` is not found. - See also + See Also -------- str.find + Examples + -------- + >>> a = np.array(["NumPy is a Python library"]) + >>> np.char.find(a, "Python", start=0, end=None) + array([11]) + """ return _vec_string( a, int_, 'find', [sub, start] + _clean_args(end)) @@ -737,10 +801,16 @@ def index(a, sub, start=0, end=None): out : ndarray Output array of ints. Returns -1 if `sub` is not found. - See also + See Also -------- find, str.find + Examples + -------- + >>> a = np.array(["Computer Science"]) + >>> np.char.index(a, "Science", start=0, end=None) + array([9]) + """ return _vec_string( a, int_, 'index', [sub, start] + _clean_args(end)) @@ -765,7 +835,7 @@ def isalnum(a): out : ndarray Output array of str or unicode, depending on input type - See also + See Also -------- str.isalnum """ @@ -791,7 +861,7 @@ def isalpha(a): out : ndarray Output array of bools - See also + See Also -------- str.isalpha """ @@ -817,9 +887,18 @@ def isdigit(a): out : ndarray Output array of bools - See also + See Also -------- str.isdigit + + Examples + -------- + >>> a = np.array(['a', 'b', '0']) + >>> np.char.isdigit(a) + array([False, False, True]) + >>> a = np.array([['a', 'b', '0'], ['c', '1', '2']]) + >>> np.char.isdigit(a) + array([[False, False, True], [False, True, True]]) """ return _vec_string(a, bool_, 'isdigit') @@ -844,7 +923,7 @@ def islower(a): out : ndarray Output array of bools - See also + See Also -------- str.islower """ @@ -871,7 +950,7 @@ def isspace(a): out : ndarray Output array of bools - See also + See Also -------- str.isspace """ @@ -897,7 +976,7 @@ def istitle(a): out : ndarray Output array of bools - See also + See Also -------- str.istitle """ @@ -907,7 +986,7 @@ def istitle(a): @array_function_dispatch(_unary_op_dispatcher) def isupper(a): """ - Returns true for each element if all cased characters in the + Return true for each element if all cased characters in the string are uppercase and there is at least one character, false otherwise. @@ -924,9 +1003,19 @@ def isupper(a): out : ndarray Output array of bools - See also + See Also -------- str.isupper + + Examples + -------- + >>> str = "GHC" + >>> np.char.isupper(str) + array(True) + >>> a = np.array(["hello", "HELLO", "Hello"]) + >>> np.char.isupper(a) + array([False, True, False]) + """ return _vec_string(a, bool_, 'isupper') @@ -953,12 +1042,21 @@ def join(sep, seq): out : ndarray Output array of str or unicode, depending on input types - See also + See Also -------- str.join + + Examples + -------- + >>> np.char.join('-', 'osd') + array('o-s-d', dtype='<U5') + + >>> np.char.join(['-', '.'], ['ghc', 'osd']) + array(['g-h-c', 'o.s.d'], dtype='<U5') + """ - return _to_string_or_unicode_array( - _vec_string(sep, object_, 'join', (seq,))) + return _to_bytes_or_str_array( + _vec_string(sep, object_, 'join', (seq,)), seq) @@ -988,7 +1086,7 @@ def ljust(a, width, fillchar=' '): out : ndarray Output array of str or unicode, depending on input type - See also + See Also -------- str.ljust @@ -996,10 +1094,10 @@ def ljust(a, width, fillchar=' '): a_arr = numpy.asarray(a) width_arr = numpy.asarray(width) size = int(numpy.max(width_arr.flat)) - if numpy.issubdtype(a_arr.dtype, numpy.string_): + if numpy.issubdtype(a_arr.dtype, numpy.bytes_): fillchar = asbytes(fillchar) return _vec_string( - a_arr, (a_arr.dtype.type, size), 'ljust', (width_arr, fillchar)) + a_arr, type(a_arr.dtype)(size), 'ljust', (width_arr, fillchar)) @array_function_dispatch(_unary_op_dispatcher) @@ -1021,7 +1119,7 @@ def lower(a): out : ndarray, {str, unicode} Output array of str or unicode, depending on input type - See also + See Also -------- str.lower @@ -1066,7 +1164,7 @@ def lstrip(a, chars=None): out : ndarray, {str, unicode} Output array of str or unicode, depending on input type - See also + See Also -------- str.lstrip @@ -1127,13 +1225,13 @@ def partition(a, sep): The output array will have an extra dimension with 3 elements per input element. - See also + See Also -------- str.partition """ - return _to_string_or_unicode_array( - _vec_string(a, object_, 'partition', (sep,))) + return _to_bytes_or_str_array( + _vec_string(a, object_, 'partition', (sep,)), a) def _replace_dispatcher(a, old, new, count=None): @@ -1163,14 +1261,22 @@ def replace(a, old, new, count=None): out : ndarray Output array of str or unicode, depending on input type - See also + See Also -------- str.replace + + Examples + -------- + >>> a = np.array(["That is a mango", "Monkeys eat mangos"]) + >>> np.char.replace(a, 'mango', 'banana') + array(['That is a banana', 'Monkeys eat bananas'], dtype='<U19') + >>> a = np.array(["The dish is fresh", "This is it"]) + >>> np.char.replace(a, 'is', 'was') + array(['The dwash was fresh', 'Thwas was it'], dtype='<U19') """ - return _to_string_or_unicode_array( - _vec_string( - a, object_, 'replace', [old, new] + _clean_args(count))) + return _to_bytes_or_str_array( + _vec_string(a, object_, 'replace', [old, new] + _clean_args(count)), a) @array_function_dispatch(_count_dispatcher) @@ -1197,7 +1303,7 @@ def rfind(a, sub, start=0, end=None): out : ndarray Output array of ints. Return -1 on failure. - See also + See Also -------- str.rfind @@ -1227,7 +1333,7 @@ def rindex(a, sub, start=0, end=None): out : ndarray Output array of ints. - See also + See Also -------- rfind, str.rindex @@ -1258,7 +1364,7 @@ def rjust(a, width, fillchar=' '): out : ndarray Output array of str or unicode, depending on input type - See also + See Also -------- str.rjust @@ -1266,10 +1372,10 @@ def rjust(a, width, fillchar=' '): a_arr = numpy.asarray(a) width_arr = numpy.asarray(width) size = int(numpy.max(width_arr.flat)) - if numpy.issubdtype(a_arr.dtype, numpy.string_): + if numpy.issubdtype(a_arr.dtype, numpy.bytes_): fillchar = asbytes(fillchar) return _vec_string( - a_arr, (a_arr.dtype.type, size), 'rjust', (width_arr, fillchar)) + a_arr, type(a_arr.dtype)(size), 'rjust', (width_arr, fillchar)) @array_function_dispatch(_partition_dispatcher) @@ -1299,13 +1405,13 @@ def rpartition(a, sep): type. The output array will have an extra dimension with 3 elements per input element. - See also + See Also -------- str.rpartition """ - return _to_string_or_unicode_array( - _vec_string(a, object_, 'rpartition', (sep,))) + return _to_bytes_or_str_array( + _vec_string(a, object_, 'rpartition', (sep,)), a) def _split_dispatcher(a, sep=None, maxsplit=None): @@ -1339,7 +1445,7 @@ def rsplit(a, sep=None, maxsplit=None): out : ndarray Array of list objects - See also + See Also -------- str.rsplit, split @@ -1378,7 +1484,7 @@ def rstrip(a, chars=None): out : ndarray Output array of str or unicode, depending on input type - See also + See Also -------- str.rstrip @@ -1423,7 +1529,7 @@ def split(a, sep=None, maxsplit=None): out : ndarray Array of list objects - See also + See Also -------- str.split, rsplit @@ -1459,7 +1565,7 @@ def splitlines(a, keepends=None): out : ndarray Array of list objects - See also + See Also -------- str.splitlines @@ -1495,7 +1601,7 @@ def startswith(a, prefix, start=0, end=None): out : ndarray Array of booleans - See also + See Also -------- str.startswith @@ -1528,7 +1634,7 @@ def strip(a, chars=None): out : ndarray Output array of str or unicode, depending on input type - See also + See Also -------- str.strip @@ -1569,7 +1675,7 @@ def swapcase(a): out : ndarray, {str, unicode} Output array of str or unicode, depending on input type - See also + See Also -------- str.swapcase @@ -1609,7 +1715,7 @@ def title(a): out : ndarray Output array of str or unicode, depending on input type - See also + See Also -------- str.title @@ -1654,13 +1760,13 @@ def translate(a, table, deletechars=None): out : ndarray Output array of str or unicode, depending on input type - See also + See Also -------- str.translate """ a_arr = numpy.asarray(a) - if issubclass(a_arr.dtype.type, unicode_): + if issubclass(a_arr.dtype.type, str_): return _vec_string( a_arr, a_arr.dtype, 'translate', (table,)) else: @@ -1687,7 +1793,7 @@ def upper(a): out : ndarray, {str, unicode} Output array of str or unicode, depending on input type - See also + See Also -------- str.upper @@ -1726,7 +1832,7 @@ def zfill(a, width): out : ndarray, {str, unicode} Output array of str or unicode, depending on input type - See also + See Also -------- str.zfill @@ -1735,7 +1841,7 @@ def zfill(a, width): width_arr = numpy.asarray(width) size = int(numpy.max(width_arr.flat)) return _vec_string( - a_arr, (a_arr.dtype.type, size), 'zfill', (width_arr,)) + a_arr, type(a_arr.dtype)(size), 'zfill', (width_arr,)) @array_function_dispatch(_unary_op_dispatcher) @@ -1744,7 +1850,7 @@ def isnumeric(a): For each element, return True if there are only numeric characters in the element. - Calls `unicode.isnumeric` element-wise. + Calls `str.isnumeric` element-wise. Numeric characters include digit characters, and all characters that have the Unicode numeric value property, e.g. ``U+2155, @@ -1760,12 +1866,17 @@ def isnumeric(a): out : ndarray, bool Array of booleans of same shape as `a`. - See also + See Also + -------- + str.isnumeric + + Examples -------- - unicode.isnumeric + >>> np.char.isnumeric(['123', '123abc', '9.0', '1/4', 'VIII']) + array([ True, False, False, False, False]) """ - if _use_unicode(a) != unicode_: + if not _is_unicode(a): raise TypeError("isnumeric is only available for Unicode strings and arrays") return _vec_string(a, bool_, 'isnumeric') @@ -1776,7 +1887,7 @@ def isdecimal(a): For each element, return True if there are only decimal characters in the element. - Calls `unicode.isdecimal` element-wise. + Calls `str.isdecimal` element-wise. Decimal characters include digit characters, and all characters that can be used to form decimal-radix numbers, @@ -1792,13 +1903,19 @@ def isdecimal(a): out : ndarray, bool Array of booleans identical in shape to `a`. - See also + See Also -------- - unicode.isdecimal + str.isdecimal - """ - if _use_unicode(a) != unicode_: - raise TypeError("isnumeric is only available for Unicode strings and arrays") + Examples + -------- + >>> np.char.isdecimal(['12345', '4.99', '123ABC', '']) + array([ True, False, False, False]) + + """ + if not _is_unicode(a): + raise TypeError( + "isdecimal is only available for Unicode strings and arrays") return _vec_string(a, bool_, 'isdecimal') @@ -1814,7 +1931,7 @@ class chararray(ndarray): The `chararray` class exists for backwards compatibility with Numarray, it is not recommended for new development. Starting from numpy 1.4, if one needs arrays of strings, it is recommended to use arrays of - `dtype` `object_`, `string_` or `unicode_`, and use the free functions + `dtype` `object_`, `bytes_` or `str_`, and use the free functions in the `numpy.char` module for fast vectorized string operations. Versus a regular NumPy array of type `str` or `unicode`, this @@ -1948,9 +2065,9 @@ class chararray(ndarray): global _globalvar if unicode: - dtype = unicode_ + dtype = str_ else: - dtype = string_ + dtype = bytes_ # force itemsize to be a Python int, since using NumPy integer # types results in itemsize.itemsize being used as the size of @@ -2004,7 +2121,7 @@ class chararray(ndarray): """ Return (self == other) element-wise. - See also + See Also -------- equal """ @@ -2014,7 +2131,7 @@ class chararray(ndarray): """ Return (self != other) element-wise. - See also + See Also -------- not_equal """ @@ -2024,7 +2141,7 @@ class chararray(ndarray): """ Return (self >= other) element-wise. - See also + See Also -------- greater_equal """ @@ -2034,7 +2151,7 @@ class chararray(ndarray): """ Return (self <= other) element-wise. - See also + See Also -------- less_equal """ @@ -2044,7 +2161,7 @@ class chararray(ndarray): """ Return (self > other) element-wise. - See also + See Also -------- greater """ @@ -2054,7 +2171,7 @@ class chararray(ndarray): """ Return (self < other) element-wise. - See also + See Also -------- less """ @@ -2065,7 +2182,7 @@ class chararray(ndarray): Return (self + other), that is string concatenation, element-wise for a pair of array_likes of str or unicode. - See also + See Also -------- add """ @@ -2074,9 +2191,9 @@ class chararray(ndarray): def __radd__(self, other): """ Return (other + self), that is string concatenation, - element-wise for a pair of array_likes of `string_` or `unicode_`. + element-wise for a pair of array_likes of `bytes_` or `str_`. - See also + See Also -------- add """ @@ -2087,7 +2204,7 @@ class chararray(ndarray): Return (self * i), that is string multiple concatenation, element-wise. - See also + See Also -------- multiply """ @@ -2098,7 +2215,7 @@ class chararray(ndarray): Return (self * i), that is string multiple concatenation, element-wise. - See also + See Also -------- multiply """ @@ -2107,10 +2224,10 @@ class chararray(ndarray): def __mod__(self, i): """ Return (self % i), that is pre-Python 2.6 string formatting - (interpolation), element-wise for a pair of array_likes of `string_` - or `unicode_`. + (interpolation), element-wise for a pair of array_likes of `bytes_` + or `str_`. - See also + See Also -------- mod """ @@ -2145,7 +2262,7 @@ class chararray(ndarray): Return a copy of `self` with only the first character of each element capitalized. - See also + See Also -------- char.capitalize @@ -2157,7 +2274,7 @@ class chararray(ndarray): Return a copy of `self` with its elements centered in a string of length `width`. - See also + See Also -------- center """ @@ -2168,7 +2285,7 @@ class chararray(ndarray): Returns an array with the number of non-overlapping occurrences of substring `sub` in the range [`start`, `end`]. - See also + See Also -------- char.count @@ -2177,9 +2294,9 @@ class chararray(ndarray): def decode(self, encoding=None, errors=None): """ - Calls `str.decode` element-wise. + Calls ``bytes.decode`` element-wise. - See also + See Also -------- char.decode @@ -2190,7 +2307,7 @@ class chararray(ndarray): """ Calls `str.encode` element-wise. - See also + See Also -------- char.encode @@ -2202,7 +2319,7 @@ class chararray(ndarray): Returns a boolean array which is `True` where the string element in `self` ends with `suffix`, otherwise `False`. - See also + See Also -------- char.endswith @@ -2214,7 +2331,7 @@ class chararray(ndarray): Return a copy of each string element where all tab characters are replaced by one or more spaces. - See also + See Also -------- char.expandtabs @@ -2226,7 +2343,7 @@ class chararray(ndarray): For each element, return the lowest index in the string where substring `sub` is found. - See also + See Also -------- char.find @@ -2237,7 +2354,7 @@ class chararray(ndarray): """ Like `find`, but raises `ValueError` when the substring is not found. - See also + See Also -------- char.index @@ -2250,7 +2367,7 @@ class chararray(ndarray): are alphanumeric and there is at least one character, false otherwise. - See also + See Also -------- char.isalnum @@ -2263,7 +2380,7 @@ class chararray(ndarray): are alphabetic and there is at least one character, false otherwise. - See also + See Also -------- char.isalpha @@ -2275,7 +2392,7 @@ class chararray(ndarray): Returns true for each element if all characters in the string are digits and there is at least one character, false otherwise. - See also + See Also -------- char.isdigit @@ -2288,7 +2405,7 @@ class chararray(ndarray): string are lowercase and there is at least one cased character, false otherwise. - See also + See Also -------- char.islower @@ -2301,7 +2418,7 @@ class chararray(ndarray): characters in the string and there is at least one character, false otherwise. - See also + See Also -------- char.isspace @@ -2313,7 +2430,7 @@ class chararray(ndarray): Returns true for each element if the element is a titlecased string and there is at least one character, false otherwise. - See also + See Also -------- char.istitle @@ -2326,7 +2443,7 @@ class chararray(ndarray): string are uppercase and there is at least one character, false otherwise. - See also + See Also -------- char.isupper @@ -2338,7 +2455,7 @@ class chararray(ndarray): Return a string which is the concatenation of the strings in the sequence `seq`. - See also + See Also -------- char.join @@ -2350,7 +2467,7 @@ class chararray(ndarray): Return an array with the elements of `self` left-justified in a string of length `width`. - See also + See Also -------- char.ljust @@ -2362,7 +2479,7 @@ class chararray(ndarray): Return an array with the elements of `self` converted to lowercase. - See also + See Also -------- char.lower @@ -2374,7 +2491,7 @@ class chararray(ndarray): For each element in `self`, return a copy with the leading characters removed. - See also + See Also -------- char.lstrip @@ -2385,7 +2502,7 @@ class chararray(ndarray): """ Partition each element in `self` around `sep`. - See also + See Also -------- partition """ @@ -2396,7 +2513,7 @@ class chararray(ndarray): For each element in `self`, return a copy of the string with all occurrences of substring `old` replaced by `new`. - See also + See Also -------- char.replace @@ -2409,7 +2526,7 @@ class chararray(ndarray): where substring `sub` is found, such that `sub` is contained within [`start`, `end`]. - See also + See Also -------- char.rfind @@ -2421,7 +2538,7 @@ class chararray(ndarray): Like `rfind`, but raises `ValueError` when the substring `sub` is not found. - See also + See Also -------- char.rindex @@ -2433,7 +2550,7 @@ class chararray(ndarray): Return an array with the elements of `self` right-justified in a string of length `width`. - See also + See Also -------- char.rjust @@ -2444,7 +2561,7 @@ class chararray(ndarray): """ Partition each element in `self` around `sep`. - See also + See Also -------- rpartition """ @@ -2455,7 +2572,7 @@ class chararray(ndarray): For each element in `self`, return a list of the words in the string, using `sep` as the delimiter string. - See also + See Also -------- char.rsplit @@ -2467,7 +2584,7 @@ class chararray(ndarray): For each element in `self`, return a copy with the trailing characters removed. - See also + See Also -------- char.rstrip @@ -2479,7 +2596,7 @@ class chararray(ndarray): For each element in `self`, return a list of the words in the string, using `sep` as the delimiter string. - See also + See Also -------- char.split @@ -2491,7 +2608,7 @@ class chararray(ndarray): For each element in `self`, return a list of the lines in the element, breaking at line boundaries. - See also + See Also -------- char.splitlines @@ -2503,7 +2620,7 @@ class chararray(ndarray): Returns a boolean array which is `True` where the string element in `self` starts with `prefix`, otherwise `False`. - See also + See Also -------- char.startswith @@ -2515,7 +2632,7 @@ class chararray(ndarray): For each element in `self`, return a copy with the leading and trailing characters removed. - See also + See Also -------- char.strip @@ -2527,7 +2644,7 @@ class chararray(ndarray): For each element in `self`, return a copy of the string with uppercase characters converted to lowercase and vice versa. - See also + See Also -------- char.swapcase @@ -2540,7 +2657,7 @@ class chararray(ndarray): string: words start with uppercase characters, all remaining cased characters are lowercase. - See also + See Also -------- char.title @@ -2554,7 +2671,7 @@ class chararray(ndarray): `deletechars` are removed, and the remaining characters have been mapped through the given translation table. - See also + See Also -------- char.translate @@ -2566,7 +2683,7 @@ class chararray(ndarray): Return an array with the elements of `self` converted to uppercase. - See also + See Also -------- char.upper @@ -2578,7 +2695,7 @@ class chararray(ndarray): Return the numeric string left-filled with zeros in a string of length `width`. - See also + See Also -------- char.zfill @@ -2590,7 +2707,7 @@ class chararray(ndarray): For each element in `self`, return True if there are only numeric characters in the element. - See also + See Also -------- char.isnumeric @@ -2602,7 +2719,7 @@ class chararray(ndarray): For each element in `self`, return True if there are only decimal characters in the element. - See also + See Also -------- char.isdecimal @@ -2610,6 +2727,7 @@ class chararray(ndarray): return isdecimal(self) +@set_module("numpy.char") def array(obj, itemsize=None, copy=True, unicode=None, order=None): """ Create a `chararray`. @@ -2617,7 +2735,7 @@ def array(obj, itemsize=None, copy=True, unicode=None, order=None): .. note:: This class is provided for numarray backward-compatibility. New code (not concerned with numarray compatibility) should use - arrays of type `string_` or `unicode_` and use the free functions + arrays of type `bytes_` or `str_` and use the free functions in :mod:`numpy.char <numpy.core.defchararray>` for fast vectorized string operations instead. @@ -2700,26 +2818,26 @@ def array(obj, itemsize=None, copy=True, unicode=None, order=None): # itemsize is in 8-bit chars, so for Unicode, we need # to divide by the size of a single Unicode character, # which for NumPy is always 4 - if issubclass(obj.dtype.type, unicode_): + if issubclass(obj.dtype.type, str_): itemsize //= 4 if unicode is None: - if issubclass(obj.dtype.type, unicode_): + if issubclass(obj.dtype.type, str_): unicode = True else: unicode = False if unicode: - dtype = unicode_ + dtype = str_ else: - dtype = string_ + dtype = bytes_ if order is not None: obj = numpy.asarray(obj, order=order) if (copy or (itemsize != obj.itemsize) or - (not unicode and isinstance(obj, unicode_)) or - (unicode and isinstance(obj, string_))): + (not unicode and isinstance(obj, str_)) or + (unicode and isinstance(obj, bytes_))): obj = obj.astype((dtype, int(itemsize))) return obj @@ -2732,9 +2850,9 @@ def array(obj, itemsize=None, copy=True, unicode=None, order=None): # Fall through to the default case if unicode: - dtype = unicode_ + dtype = str_ else: - dtype = string_ + dtype = bytes_ if itemsize is None: val = narray(obj, dtype=dtype, order=order, subok=True) @@ -2743,6 +2861,7 @@ def array(obj, itemsize=None, copy=True, unicode=None, order=None): return val.view(chararray) +@set_module("numpy.char") def asarray(obj, itemsize=None, unicode=None, order=None): """ Convert the input to a `chararray`, copying the data only if |