diff options
author | Eric Wieser <wieser.eric@gmail.com> | 2019-04-16 01:32:35 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2019-04-16 01:32:35 -0700 |
commit | 9af2340580bcbacc06b1079df3e9b8abf90b7657 (patch) | |
tree | dd8041d48e8cd9b3cc5ddcdab9e0ba851a0b4a9a /numpy/core/defchararray.py | |
parent | 389bd44e32b0eace0d024b126931a0a00d14cffe (diff) | |
parent | cc94f360febdef0e6c4183c50555ba82e60ccff6 (diff) | |
download | numpy-9af2340580bcbacc06b1079df3e9b8abf90b7657.tar.gz |
Merge branch 'master' into poly1d-fixes-fixes-fixes-fixes
Diffstat (limited to 'numpy/core/defchararray.py')
-rw-r--r-- | numpy/core/defchararray.py | 230 |
1 files changed, 185 insertions, 45 deletions
diff --git a/numpy/core/defchararray.py b/numpy/core/defchararray.py index a11b343cb..3fd7d14c4 100644 --- a/numpy/core/defchararray.py +++ b/numpy/core/defchararray.py @@ -17,16 +17,19 @@ The preferred alias for `defchararray` is `numpy.char`. """ from __future__ import division, absolute_import, print_function +import functools import sys from .numerictypes import string_, unicode_, integer, object_, bool_, character from .numeric import ndarray, compare_chararrays from .numeric import array as narray from numpy.core.multiarray import _vec_string +from numpy.core.overrides import set_module +from numpy.core import overrides from numpy.compat import asbytes, long import numpy __all__ = [ - 'chararray', 'equal', 'not_equal', 'greater_equal', 'less_equal', + 'equal', 'not_equal', 'greater_equal', 'less_equal', 'greater', 'less', 'str_len', 'add', 'multiply', 'mod', 'capitalize', 'center', 'count', 'decode', 'encode', 'endswith', 'expandtabs', 'find', 'index', 'isalnum', 'isalpha', 'isdigit', 'islower', 'isspace', @@ -47,6 +50,10 @@ else: _bytes = str _len = len +array_function_dispatch = functools.partial( + overrides.array_function_dispatch, module='numpy.char') + + def _use_unicode(*args): """ Helper function for determining the output type of some string @@ -95,6 +102,11 @@ def _get_num_chars(a): return a.itemsize +def _binary_op_dispatcher(x1, x2): + return (x1, x2) + + +@array_function_dispatch(_binary_op_dispatcher) def equal(x1, x2): """ Return (x1 == x2) element-wise. @@ -119,6 +131,8 @@ def equal(x1, x2): """ return compare_chararrays(x1, x2, '==', True) + +@array_function_dispatch(_binary_op_dispatcher) def not_equal(x1, x2): """ Return (x1 != x2) element-wise. @@ -143,6 +157,8 @@ def not_equal(x1, x2): """ return compare_chararrays(x1, x2, '!=', True) + +@array_function_dispatch(_binary_op_dispatcher) def greater_equal(x1, x2): """ Return (x1 >= x2) element-wise. @@ -168,6 +184,8 @@ def greater_equal(x1, x2): """ return compare_chararrays(x1, x2, '>=', True) + +@array_function_dispatch(_binary_op_dispatcher) def less_equal(x1, x2): """ Return (x1 <= x2) element-wise. @@ -192,6 +210,8 @@ def less_equal(x1, x2): """ return compare_chararrays(x1, x2, '<=', True) + +@array_function_dispatch(_binary_op_dispatcher) def greater(x1, x2): """ Return (x1 > x2) element-wise. @@ -216,6 +236,8 @@ def greater(x1, x2): """ return compare_chararrays(x1, x2, '>', True) + +@array_function_dispatch(_binary_op_dispatcher) def less(x1, x2): """ Return (x1 < x2) element-wise. @@ -240,6 +262,12 @@ def less(x1, x2): """ return compare_chararrays(x1, x2, '<', True) + +def _unary_op_dispatcher(a): + return (a,) + + +@array_function_dispatch(_unary_op_dispatcher) def str_len(a): """ Return len(a) element-wise. @@ -259,6 +287,8 @@ def str_len(a): """ return _vec_string(a, integer, '__len__') + +@array_function_dispatch(_binary_op_dispatcher) def add(x1, x2): """ Return element-wise string concatenation for two arrays of str or unicode. @@ -285,6 +315,12 @@ def add(x1, x2): dtype = _use_unicode(arr1, arr2) return _vec_string(arr1, (dtype, out_size), '__add__', (arr2,)) + +def _multiply_dispatcher(a, i): + return (a,) + + +@array_function_dispatch(_multiply_dispatcher) def multiply(a, i): """ Return (a * i), that is string multiple concatenation, @@ -313,6 +349,12 @@ def multiply(a, i): return _vec_string( a_arr, (a_arr.dtype.type, out_size), '__mul__', (i_arr,)) + +def _mod_dispatcher(a, values): + return (a, values) + + +@array_function_dispatch(_mod_dispatcher) def mod(a, values): """ Return (a % i), that is pre-Python 2.6 string formatting @@ -339,6 +381,8 @@ def mod(a, values): return _to_string_or_unicode_array( _vec_string(a, object_, '__mod__', (values,))) + +@array_function_dispatch(_unary_op_dispatcher) def capitalize(a): """ Return a copy of `a` with only the first character of each element @@ -377,6 +421,11 @@ def capitalize(a): return _vec_string(a_arr, a_arr.dtype, 'capitalize') +def _center_dispatcher(a, width, fillchar=None): + return (a,) + + +@array_function_dispatch(_center_dispatcher) def center(a, width, fillchar=' '): """ Return a copy of `a` with its elements centered in a string of @@ -413,6 +462,11 @@ def center(a, width, fillchar=' '): a_arr, (a_arr.dtype.type, size), 'center', (width_arr, fillchar)) +def _count_dispatcher(a, sub, start=None, end=None): + return (a,) + + +@array_function_dispatch(_count_dispatcher) def count(a, sub, start=0, end=None): """ Returns an array with the number of non-overlapping occurrences of @@ -444,8 +498,7 @@ def count(a, sub, start=0, end=None): -------- >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']) >>> c - array(['aAaAaA', ' aA ', 'abBABba'], - dtype='|S7') + array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7') >>> np.char.count(c, 'A') array([3, 1, 1]) >>> np.char.count(c, 'aA') @@ -459,6 +512,11 @@ def count(a, sub, start=0, end=None): return _vec_string(a, integer, 'count', [sub, start] + _clean_args(end)) +def _code_dispatcher(a, encoding=None, errors=None): + return (a,) + + +@array_function_dispatch(_code_dispatcher) def decode(a, encoding=None, errors=None): """ Calls `str.decode` element-wise. @@ -493,8 +551,7 @@ def decode(a, encoding=None, errors=None): -------- >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']) >>> c - array(['aAaAaA', ' aA ', 'abBABba'], - dtype='|S7') + array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7') >>> np.char.encode(c, encoding='cp037') array(['\\x81\\xc1\\x81\\xc1\\x81\\xc1', '@@\\x81\\xc1@@', '\\x81\\x82\\xc2\\xc1\\xc2\\x82\\x81'], @@ -505,6 +562,7 @@ def decode(a, encoding=None, errors=None): _vec_string(a, object_, 'decode', _clean_args(encoding, errors))) +@array_function_dispatch(_code_dispatcher) def encode(a, encoding=None, errors=None): """ Calls `str.encode` element-wise. @@ -540,6 +598,11 @@ def encode(a, encoding=None, errors=None): _vec_string(a, object_, 'encode', _clean_args(encoding, errors))) +def _endswith_dispatcher(a, suffix, start=None, end=None): + return (a,) + + +@array_function_dispatch(_endswith_dispatcher) def endswith(a, suffix, start=0, end=None): """ Returns a boolean array which is `True` where the string element @@ -572,18 +635,22 @@ def endswith(a, suffix, start=0, end=None): >>> s[0] = 'foo' >>> s[1] = 'bar' >>> s - array(['foo', 'bar'], - dtype='|S3') + array(['foo', 'bar'], dtype='<U3') >>> np.char.endswith(s, 'ar') - array([False, True], dtype=bool) + array([False, True]) >>> np.char.endswith(s, 'a', start=1, end=2) - array([False, True], dtype=bool) + array([False, True]) """ return _vec_string( a, bool_, 'endswith', [suffix, start] + _clean_args(end)) +def _expandtabs_dispatcher(a, tabsize=None): + return (a,) + + +@array_function_dispatch(_expandtabs_dispatcher) def expandtabs(a, tabsize=8): """ Return a copy of each string element where all tab characters are @@ -619,6 +686,7 @@ def expandtabs(a, tabsize=8): _vec_string(a, object_, 'expandtabs', (tabsize,))) +@array_function_dispatch(_count_dispatcher) def find(a, sub, start=0, end=None): """ For each element, return the lowest index in the string where @@ -654,6 +722,7 @@ def find(a, sub, start=0, end=None): a, integer, 'find', [sub, start] + _clean_args(end)) +@array_function_dispatch(_count_dispatcher) def index(a, sub, start=0, end=None): """ Like `find`, but raises `ValueError` when the substring is not found. @@ -681,6 +750,8 @@ def index(a, sub, start=0, end=None): return _vec_string( a, integer, 'index', [sub, start] + _clean_args(end)) + +@array_function_dispatch(_unary_op_dispatcher) def isalnum(a): """ Returns true for each element if all characters in the string are @@ -705,6 +776,8 @@ def isalnum(a): """ return _vec_string(a, bool_, 'isalnum') + +@array_function_dispatch(_unary_op_dispatcher) def isalpha(a): """ Returns true for each element if all characters in the string are @@ -729,6 +802,8 @@ def isalpha(a): """ return _vec_string(a, bool_, 'isalpha') + +@array_function_dispatch(_unary_op_dispatcher) def isdigit(a): """ Returns true for each element if all characters in the string are @@ -753,6 +828,8 @@ def isdigit(a): """ return _vec_string(a, bool_, 'isdigit') + +@array_function_dispatch(_unary_op_dispatcher) def islower(a): """ Returns true for each element if all cased characters in the @@ -778,6 +855,8 @@ def islower(a): """ return _vec_string(a, bool_, 'islower') + +@array_function_dispatch(_unary_op_dispatcher) def isspace(a): """ Returns true for each element if there are only whitespace @@ -803,6 +882,8 @@ def isspace(a): """ return _vec_string(a, bool_, 'isspace') + +@array_function_dispatch(_unary_op_dispatcher) def istitle(a): """ Returns true for each element if the element is a titlecased @@ -827,6 +908,8 @@ def istitle(a): """ return _vec_string(a, bool_, 'istitle') + +@array_function_dispatch(_unary_op_dispatcher) def isupper(a): """ Returns true for each element if all cased characters in the @@ -852,6 +935,12 @@ def isupper(a): """ return _vec_string(a, bool_, 'isupper') + +def _join_dispatcher(sep, seq): + return (sep, seq) + + +@array_function_dispatch(_join_dispatcher) def join(sep, seq): """ Return a string which is the concatenation of the strings in the @@ -877,6 +966,12 @@ def join(sep, seq): _vec_string(sep, object_, 'join', (seq,))) + +def _just_dispatcher(a, width, fillchar=None): + return (a,) + + +@array_function_dispatch(_just_dispatcher) def ljust(a, width, fillchar=' '): """ Return an array with the elements of `a` left-justified in a @@ -912,6 +1007,7 @@ def ljust(a, width, fillchar=' '): a_arr, (a_arr.dtype.type, size), 'ljust', (width_arr, fillchar)) +@array_function_dispatch(_unary_op_dispatcher) def lower(a): """ Return an array with the elements converted to lowercase. @@ -937,17 +1033,20 @@ def lower(a): Examples -------- >>> c = np.array(['A1B C', '1BCA', 'BCA1']); c - array(['A1B C', '1BCA', 'BCA1'], - dtype='|S5') + array(['A1B C', '1BCA', 'BCA1'], dtype='<U5') >>> np.char.lower(c) - array(['a1b c', '1bca', 'bca1'], - dtype='|S5') + array(['a1b c', '1bca', 'bca1'], dtype='<U5') """ a_arr = numpy.asarray(a) return _vec_string(a_arr, a_arr.dtype, 'lower') +def _strip_dispatcher(a, chars=None): + return (a,) + + +@array_function_dispatch(_strip_dispatcher) def lstrip(a, chars=None): """ For each element in `a`, return a copy with the leading characters @@ -980,23 +1079,20 @@ def lstrip(a, chars=None): -------- >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']) >>> c - array(['aAaAaA', ' aA ', 'abBABba'], - dtype='|S7') + array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7') The 'a' variable is unstripped from c[1] because whitespace leading. >>> np.char.lstrip(c, 'a') - array(['AaAaA', ' aA ', 'bBABba'], - dtype='|S7') + array(['AaAaA', ' aA ', 'bBABba'], dtype='<U7') >>> np.char.lstrip(c, 'A') # leaves c unchanged - array(['aAaAaA', ' aA ', 'abBABba'], - dtype='|S7') + array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7') >>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, '')).all() - ... # XXX: is this a regression? this line now returns False + ... # XXX: is this a regression? This used to return True ... # np.char.lstrip(c,'') does not modify c at all. - True + False >>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, None)).all() True @@ -1005,6 +1101,11 @@ def lstrip(a, chars=None): return _vec_string(a_arr, a_arr.dtype, 'lstrip', (chars,)) +def _partition_dispatcher(a, sep): + return (a,) + + +@array_function_dispatch(_partition_dispatcher) def partition(a, sep): """ Partition each element in `a` around `sep`. @@ -1040,6 +1141,11 @@ def partition(a, sep): _vec_string(a, object_, 'partition', (sep,))) +def _replace_dispatcher(a, old, new, count=None): + return (a,) + + +@array_function_dispatch(_replace_dispatcher) def replace(a, old, new, count=None): """ For each element in `a`, return a copy of the string with all @@ -1072,6 +1178,7 @@ def replace(a, old, new, count=None): a, object_, 'replace', [old, new] + _clean_args(count))) +@array_function_dispatch(_count_dispatcher) def rfind(a, sub, start=0, end=None): """ For each element in `a`, return the highest index in the string @@ -1104,6 +1211,7 @@ def rfind(a, sub, start=0, end=None): a, integer, 'rfind', [sub, start] + _clean_args(end)) +@array_function_dispatch(_count_dispatcher) def rindex(a, sub, start=0, end=None): """ Like `rfind`, but raises `ValueError` when the substring `sub` is @@ -1133,6 +1241,7 @@ def rindex(a, sub, start=0, end=None): a, integer, 'rindex', [sub, start] + _clean_args(end)) +@array_function_dispatch(_just_dispatcher) def rjust(a, width, fillchar=' '): """ Return an array with the elements of `a` right-justified in a @@ -1168,6 +1277,7 @@ def rjust(a, width, fillchar=' '): a_arr, (a_arr.dtype.type, size), 'rjust', (width_arr, fillchar)) +@array_function_dispatch(_partition_dispatcher) def rpartition(a, sep): """ Partition (split) each element around the right-most separator. @@ -1203,6 +1313,11 @@ def rpartition(a, sep): _vec_string(a, object_, 'rpartition', (sep,))) +def _split_dispatcher(a, sep=None, maxsplit=None): + return (a,) + + +@array_function_dispatch(_split_dispatcher) def rsplit(a, sep=None, maxsplit=None): """ For each element in `a`, return a list of the words in the @@ -1240,6 +1355,11 @@ def rsplit(a, sep=None, maxsplit=None): a, object_, 'rsplit', [sep] + _clean_args(maxsplit)) +def _strip_dispatcher(a, chars=None): + return (a,) + + +@array_function_dispatch(_strip_dispatcher) def rstrip(a, chars=None): """ For each element in `a`, return a copy with the trailing @@ -1272,10 +1392,10 @@ def rstrip(a, chars=None): >>> c = np.array(['aAaAaA', 'abBABba'], dtype='S7'); c array(['aAaAaA', 'abBABba'], dtype='|S7') - >>> np.char.rstrip(c, 'a') + >>> np.char.rstrip(c, b'a') array(['aAaAaA', 'abBABb'], dtype='|S7') - >>> np.char.rstrip(c, 'A') + >>> np.char.rstrip(c, b'A') array(['aAaAa', 'abBABba'], dtype='|S7') @@ -1284,12 +1404,13 @@ def rstrip(a, chars=None): return _vec_string(a_arr, a_arr.dtype, 'rstrip', (chars,)) +@array_function_dispatch(_split_dispatcher) def split(a, sep=None, maxsplit=None): """ For each element in `a`, return a list of the words in the string, using `sep` as the delimiter string. - Calls `str.rsplit` element-wise. + Calls `str.split` element-wise. Parameters ---------- @@ -1318,6 +1439,11 @@ def split(a, sep=None, maxsplit=None): a, object_, 'split', [sep] + _clean_args(maxsplit)) +def _splitlines_dispatcher(a, keepends=None): + return (a,) + + +@array_function_dispatch(_splitlines_dispatcher) def splitlines(a, keepends=None): """ For each element in `a`, return a list of the lines in the @@ -1347,6 +1473,11 @@ def splitlines(a, keepends=None): a, object_, 'splitlines', _clean_args(keepends)) +def _startswith_dispatcher(a, prefix, start=None, end=None): + return (a,) + + +@array_function_dispatch(_startswith_dispatcher) def startswith(a, prefix, start=0, end=None): """ Returns a boolean array which is `True` where the string element @@ -1378,12 +1509,13 @@ def startswith(a, prefix, start=0, end=None): a, bool_, 'startswith', [prefix, start] + _clean_args(end)) +@array_function_dispatch(_strip_dispatcher) def strip(a, chars=None): """ For each element in `a`, return a copy with the leading and trailing characters removed. - Calls `str.rstrip` element-wise. + Calls `str.strip` element-wise. Parameters ---------- @@ -1409,23 +1541,20 @@ def strip(a, chars=None): -------- >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']) >>> c - array(['aAaAaA', ' aA ', 'abBABba'], - dtype='|S7') + array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7') >>> np.char.strip(c) - array(['aAaAaA', 'aA', 'abBABba'], - dtype='|S7') + array(['aAaAaA', 'aA', 'abBABba'], dtype='<U7') >>> np.char.strip(c, 'a') # 'a' unstripped from c[1] because whitespace leads - array(['AaAaA', ' aA ', 'bBABb'], - dtype='|S7') + array(['AaAaA', ' aA ', 'bBABb'], dtype='<U7') >>> np.char.strip(c, 'A') # 'A' unstripped from c[1] because (unprinted) ws trails - array(['aAaAa', ' aA ', 'abBABba'], - dtype='|S7') + array(['aAaAa', ' aA ', 'abBABba'], dtype='<U7') """ a_arr = numpy.asarray(a) return _vec_string(a_arr, a_arr.dtype, 'strip', _clean_args(chars)) +@array_function_dispatch(_unary_op_dispatcher) def swapcase(a): """ Return element-wise a copy of the string with @@ -1463,6 +1592,7 @@ def swapcase(a): return _vec_string(a_arr, a_arr.dtype, 'swapcase') +@array_function_dispatch(_unary_op_dispatcher) def title(a): """ Return element-wise title cased version of string or unicode. @@ -1502,6 +1632,11 @@ def title(a): return _vec_string(a_arr, a_arr.dtype, 'title') +def _translate_dispatcher(a, table, deletechars=None): + return (a,) + + +@array_function_dispatch(_translate_dispatcher) def translate(a, table, deletechars=None): """ For each element in `a`, return a copy of the string where all @@ -1538,6 +1673,7 @@ def translate(a, table, deletechars=None): a_arr, a_arr.dtype, 'translate', [table] + _clean_args(deletechars)) +@array_function_dispatch(_unary_op_dispatcher) def upper(a): """ Return an array with the elements converted to uppercase. @@ -1563,17 +1699,20 @@ def upper(a): Examples -------- >>> c = np.array(['a1b c', '1bca', 'bca1']); c - array(['a1b c', '1bca', 'bca1'], - dtype='|S5') + array(['a1b c', '1bca', 'bca1'], dtype='<U5') >>> np.char.upper(c) - array(['A1B C', '1BCA', 'BCA1'], - dtype='|S5') + array(['A1B C', '1BCA', 'BCA1'], dtype='<U5') """ a_arr = numpy.asarray(a) return _vec_string(a_arr, a_arr.dtype, 'upper') +def _zfill_dispatcher(a, width): + return (a,) + + +@array_function_dispatch(_zfill_dispatcher) def zfill(a, width): """ Return the numeric string left-filled with zeros @@ -1604,6 +1743,7 @@ def zfill(a, width): a_arr, (a_arr.dtype.type, size), 'zfill', (width_arr,)) +@array_function_dispatch(_unary_op_dispatcher) def isnumeric(a): """ For each element, return True if there are only numeric @@ -1635,6 +1775,7 @@ def isnumeric(a): return _vec_string(a, bool_, 'isnumeric') +@array_function_dispatch(_unary_op_dispatcher) def isdecimal(a): """ For each element, return True if there are only decimal @@ -1666,6 +1807,7 @@ def isdecimal(a): return _vec_string(a, bool_, 'isdecimal') +@set_module('numpy') class chararray(ndarray): """ chararray(shape, itemsize=1, unicode=False, buffer=None, offset=0, @@ -1794,18 +1936,16 @@ class chararray(ndarray): >>> charar = np.chararray((3, 3)) >>> charar[:] = 'a' >>> charar - chararray([['a', 'a', 'a'], - ['a', 'a', 'a'], - ['a', 'a', 'a']], - dtype='|S1') + chararray([[b'a', b'a', b'a'], + [b'a', b'a', b'a'], + [b'a', b'a', b'a']], dtype='|S1') >>> charar = np.chararray(charar.shape, itemsize=5) >>> charar[:] = 'abc' >>> charar - chararray([['abc', 'abc', 'abc'], - ['abc', 'abc', 'abc'], - ['abc', 'abc', 'abc']], - dtype='|S5') + chararray([[b'abc', b'abc', b'abc'], + [b'abc', b'abc', b'abc'], + [b'abc', b'abc', b'abc']], dtype='|S5') """ def __new__(subtype, shape, itemsize=1, unicode=False, buffer=None, |