summaryrefslogtreecommitdiff
path: root/numpy/core/defchararray.py
diff options
context:
space:
mode:
authorEric Wieser <wieser.eric@gmail.com>2019-04-16 01:32:35 -0700
committerGitHub <noreply@github.com>2019-04-16 01:32:35 -0700
commit9af2340580bcbacc06b1079df3e9b8abf90b7657 (patch)
treedd8041d48e8cd9b3cc5ddcdab9e0ba851a0b4a9a /numpy/core/defchararray.py
parent389bd44e32b0eace0d024b126931a0a00d14cffe (diff)
parentcc94f360febdef0e6c4183c50555ba82e60ccff6 (diff)
downloadnumpy-9af2340580bcbacc06b1079df3e9b8abf90b7657.tar.gz
Merge branch 'master' into poly1d-fixes-fixes-fixes-fixes
Diffstat (limited to 'numpy/core/defchararray.py')
-rw-r--r--numpy/core/defchararray.py230
1 files changed, 185 insertions, 45 deletions
diff --git a/numpy/core/defchararray.py b/numpy/core/defchararray.py
index a11b343cb..3fd7d14c4 100644
--- a/numpy/core/defchararray.py
+++ b/numpy/core/defchararray.py
@@ -17,16 +17,19 @@ The preferred alias for `defchararray` is `numpy.char`.
"""
from __future__ import division, absolute_import, print_function
+import functools
import sys
from .numerictypes import string_, unicode_, integer, object_, bool_, character
from .numeric import ndarray, compare_chararrays
from .numeric import array as narray
from numpy.core.multiarray import _vec_string
+from numpy.core.overrides import set_module
+from numpy.core import overrides
from numpy.compat import asbytes, long
import numpy
__all__ = [
- 'chararray', 'equal', 'not_equal', 'greater_equal', 'less_equal',
+ 'equal', 'not_equal', 'greater_equal', 'less_equal',
'greater', 'less', 'str_len', 'add', 'multiply', 'mod', 'capitalize',
'center', 'count', 'decode', 'encode', 'endswith', 'expandtabs',
'find', 'index', 'isalnum', 'isalpha', 'isdigit', 'islower', 'isspace',
@@ -47,6 +50,10 @@ else:
_bytes = str
_len = len
+array_function_dispatch = functools.partial(
+ overrides.array_function_dispatch, module='numpy.char')
+
+
def _use_unicode(*args):
"""
Helper function for determining the output type of some string
@@ -95,6 +102,11 @@ def _get_num_chars(a):
return a.itemsize
+def _binary_op_dispatcher(x1, x2):
+ return (x1, x2)
+
+
+@array_function_dispatch(_binary_op_dispatcher)
def equal(x1, x2):
"""
Return (x1 == x2) element-wise.
@@ -119,6 +131,8 @@ def equal(x1, x2):
"""
return compare_chararrays(x1, x2, '==', True)
+
+@array_function_dispatch(_binary_op_dispatcher)
def not_equal(x1, x2):
"""
Return (x1 != x2) element-wise.
@@ -143,6 +157,8 @@ def not_equal(x1, x2):
"""
return compare_chararrays(x1, x2, '!=', True)
+
+@array_function_dispatch(_binary_op_dispatcher)
def greater_equal(x1, x2):
"""
Return (x1 >= x2) element-wise.
@@ -168,6 +184,8 @@ def greater_equal(x1, x2):
"""
return compare_chararrays(x1, x2, '>=', True)
+
+@array_function_dispatch(_binary_op_dispatcher)
def less_equal(x1, x2):
"""
Return (x1 <= x2) element-wise.
@@ -192,6 +210,8 @@ def less_equal(x1, x2):
"""
return compare_chararrays(x1, x2, '<=', True)
+
+@array_function_dispatch(_binary_op_dispatcher)
def greater(x1, x2):
"""
Return (x1 > x2) element-wise.
@@ -216,6 +236,8 @@ def greater(x1, x2):
"""
return compare_chararrays(x1, x2, '>', True)
+
+@array_function_dispatch(_binary_op_dispatcher)
def less(x1, x2):
"""
Return (x1 < x2) element-wise.
@@ -240,6 +262,12 @@ def less(x1, x2):
"""
return compare_chararrays(x1, x2, '<', True)
+
+def _unary_op_dispatcher(a):
+ return (a,)
+
+
+@array_function_dispatch(_unary_op_dispatcher)
def str_len(a):
"""
Return len(a) element-wise.
@@ -259,6 +287,8 @@ def str_len(a):
"""
return _vec_string(a, integer, '__len__')
+
+@array_function_dispatch(_binary_op_dispatcher)
def add(x1, x2):
"""
Return element-wise string concatenation for two arrays of str or unicode.
@@ -285,6 +315,12 @@ def add(x1, x2):
dtype = _use_unicode(arr1, arr2)
return _vec_string(arr1, (dtype, out_size), '__add__', (arr2,))
+
+def _multiply_dispatcher(a, i):
+ return (a,)
+
+
+@array_function_dispatch(_multiply_dispatcher)
def multiply(a, i):
"""
Return (a * i), that is string multiple concatenation,
@@ -313,6 +349,12 @@ def multiply(a, i):
return _vec_string(
a_arr, (a_arr.dtype.type, out_size), '__mul__', (i_arr,))
+
+def _mod_dispatcher(a, values):
+ return (a, values)
+
+
+@array_function_dispatch(_mod_dispatcher)
def mod(a, values):
"""
Return (a % i), that is pre-Python 2.6 string formatting
@@ -339,6 +381,8 @@ def mod(a, values):
return _to_string_or_unicode_array(
_vec_string(a, object_, '__mod__', (values,)))
+
+@array_function_dispatch(_unary_op_dispatcher)
def capitalize(a):
"""
Return a copy of `a` with only the first character of each element
@@ -377,6 +421,11 @@ def capitalize(a):
return _vec_string(a_arr, a_arr.dtype, 'capitalize')
+def _center_dispatcher(a, width, fillchar=None):
+ return (a,)
+
+
+@array_function_dispatch(_center_dispatcher)
def center(a, width, fillchar=' '):
"""
Return a copy of `a` with its elements centered in a string of
@@ -413,6 +462,11 @@ def center(a, width, fillchar=' '):
a_arr, (a_arr.dtype.type, size), 'center', (width_arr, fillchar))
+def _count_dispatcher(a, sub, start=None, end=None):
+ return (a,)
+
+
+@array_function_dispatch(_count_dispatcher)
def count(a, sub, start=0, end=None):
"""
Returns an array with the number of non-overlapping occurrences of
@@ -444,8 +498,7 @@ def count(a, sub, start=0, end=None):
--------
>>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
>>> c
- array(['aAaAaA', ' aA ', 'abBABba'],
- dtype='|S7')
+ array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
>>> np.char.count(c, 'A')
array([3, 1, 1])
>>> np.char.count(c, 'aA')
@@ -459,6 +512,11 @@ def count(a, sub, start=0, end=None):
return _vec_string(a, integer, 'count', [sub, start] + _clean_args(end))
+def _code_dispatcher(a, encoding=None, errors=None):
+ return (a,)
+
+
+@array_function_dispatch(_code_dispatcher)
def decode(a, encoding=None, errors=None):
"""
Calls `str.decode` element-wise.
@@ -493,8 +551,7 @@ def decode(a, encoding=None, errors=None):
--------
>>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
>>> c
- array(['aAaAaA', ' aA ', 'abBABba'],
- dtype='|S7')
+ array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
>>> np.char.encode(c, encoding='cp037')
array(['\\x81\\xc1\\x81\\xc1\\x81\\xc1', '@@\\x81\\xc1@@',
'\\x81\\x82\\xc2\\xc1\\xc2\\x82\\x81'],
@@ -505,6 +562,7 @@ def decode(a, encoding=None, errors=None):
_vec_string(a, object_, 'decode', _clean_args(encoding, errors)))
+@array_function_dispatch(_code_dispatcher)
def encode(a, encoding=None, errors=None):
"""
Calls `str.encode` element-wise.
@@ -540,6 +598,11 @@ def encode(a, encoding=None, errors=None):
_vec_string(a, object_, 'encode', _clean_args(encoding, errors)))
+def _endswith_dispatcher(a, suffix, start=None, end=None):
+ return (a,)
+
+
+@array_function_dispatch(_endswith_dispatcher)
def endswith(a, suffix, start=0, end=None):
"""
Returns a boolean array which is `True` where the string element
@@ -572,18 +635,22 @@ def endswith(a, suffix, start=0, end=None):
>>> s[0] = 'foo'
>>> s[1] = 'bar'
>>> s
- array(['foo', 'bar'],
- dtype='|S3')
+ array(['foo', 'bar'], dtype='<U3')
>>> np.char.endswith(s, 'ar')
- array([False, True], dtype=bool)
+ array([False, True])
>>> np.char.endswith(s, 'a', start=1, end=2)
- array([False, True], dtype=bool)
+ array([False, True])
"""
return _vec_string(
a, bool_, 'endswith', [suffix, start] + _clean_args(end))
+def _expandtabs_dispatcher(a, tabsize=None):
+ return (a,)
+
+
+@array_function_dispatch(_expandtabs_dispatcher)
def expandtabs(a, tabsize=8):
"""
Return a copy of each string element where all tab characters are
@@ -619,6 +686,7 @@ def expandtabs(a, tabsize=8):
_vec_string(a, object_, 'expandtabs', (tabsize,)))
+@array_function_dispatch(_count_dispatcher)
def find(a, sub, start=0, end=None):
"""
For each element, return the lowest index in the string where
@@ -654,6 +722,7 @@ def find(a, sub, start=0, end=None):
a, integer, 'find', [sub, start] + _clean_args(end))
+@array_function_dispatch(_count_dispatcher)
def index(a, sub, start=0, end=None):
"""
Like `find`, but raises `ValueError` when the substring is not found.
@@ -681,6 +750,8 @@ def index(a, sub, start=0, end=None):
return _vec_string(
a, integer, 'index', [sub, start] + _clean_args(end))
+
+@array_function_dispatch(_unary_op_dispatcher)
def isalnum(a):
"""
Returns true for each element if all characters in the string are
@@ -705,6 +776,8 @@ def isalnum(a):
"""
return _vec_string(a, bool_, 'isalnum')
+
+@array_function_dispatch(_unary_op_dispatcher)
def isalpha(a):
"""
Returns true for each element if all characters in the string are
@@ -729,6 +802,8 @@ def isalpha(a):
"""
return _vec_string(a, bool_, 'isalpha')
+
+@array_function_dispatch(_unary_op_dispatcher)
def isdigit(a):
"""
Returns true for each element if all characters in the string are
@@ -753,6 +828,8 @@ def isdigit(a):
"""
return _vec_string(a, bool_, 'isdigit')
+
+@array_function_dispatch(_unary_op_dispatcher)
def islower(a):
"""
Returns true for each element if all cased characters in the
@@ -778,6 +855,8 @@ def islower(a):
"""
return _vec_string(a, bool_, 'islower')
+
+@array_function_dispatch(_unary_op_dispatcher)
def isspace(a):
"""
Returns true for each element if there are only whitespace
@@ -803,6 +882,8 @@ def isspace(a):
"""
return _vec_string(a, bool_, 'isspace')
+
+@array_function_dispatch(_unary_op_dispatcher)
def istitle(a):
"""
Returns true for each element if the element is a titlecased
@@ -827,6 +908,8 @@ def istitle(a):
"""
return _vec_string(a, bool_, 'istitle')
+
+@array_function_dispatch(_unary_op_dispatcher)
def isupper(a):
"""
Returns true for each element if all cased characters in the
@@ -852,6 +935,12 @@ def isupper(a):
"""
return _vec_string(a, bool_, 'isupper')
+
+def _join_dispatcher(sep, seq):
+ return (sep, seq)
+
+
+@array_function_dispatch(_join_dispatcher)
def join(sep, seq):
"""
Return a string which is the concatenation of the strings in the
@@ -877,6 +966,12 @@ def join(sep, seq):
_vec_string(sep, object_, 'join', (seq,)))
+
+def _just_dispatcher(a, width, fillchar=None):
+ return (a,)
+
+
+@array_function_dispatch(_just_dispatcher)
def ljust(a, width, fillchar=' '):
"""
Return an array with the elements of `a` left-justified in a
@@ -912,6 +1007,7 @@ def ljust(a, width, fillchar=' '):
a_arr, (a_arr.dtype.type, size), 'ljust', (width_arr, fillchar))
+@array_function_dispatch(_unary_op_dispatcher)
def lower(a):
"""
Return an array with the elements converted to lowercase.
@@ -937,17 +1033,20 @@ def lower(a):
Examples
--------
>>> c = np.array(['A1B C', '1BCA', 'BCA1']); c
- array(['A1B C', '1BCA', 'BCA1'],
- dtype='|S5')
+ array(['A1B C', '1BCA', 'BCA1'], dtype='<U5')
>>> np.char.lower(c)
- array(['a1b c', '1bca', 'bca1'],
- dtype='|S5')
+ array(['a1b c', '1bca', 'bca1'], dtype='<U5')
"""
a_arr = numpy.asarray(a)
return _vec_string(a_arr, a_arr.dtype, 'lower')
+def _strip_dispatcher(a, chars=None):
+ return (a,)
+
+
+@array_function_dispatch(_strip_dispatcher)
def lstrip(a, chars=None):
"""
For each element in `a`, return a copy with the leading characters
@@ -980,23 +1079,20 @@ def lstrip(a, chars=None):
--------
>>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
>>> c
- array(['aAaAaA', ' aA ', 'abBABba'],
- dtype='|S7')
+ array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
The 'a' variable is unstripped from c[1] because whitespace leading.
>>> np.char.lstrip(c, 'a')
- array(['AaAaA', ' aA ', 'bBABba'],
- dtype='|S7')
+ array(['AaAaA', ' aA ', 'bBABba'], dtype='<U7')
>>> np.char.lstrip(c, 'A') # leaves c unchanged
- array(['aAaAaA', ' aA ', 'abBABba'],
- dtype='|S7')
+ array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
>>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, '')).all()
- ... # XXX: is this a regression? this line now returns False
+ ... # XXX: is this a regression? This used to return True
... # np.char.lstrip(c,'') does not modify c at all.
- True
+ False
>>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, None)).all()
True
@@ -1005,6 +1101,11 @@ def lstrip(a, chars=None):
return _vec_string(a_arr, a_arr.dtype, 'lstrip', (chars,))
+def _partition_dispatcher(a, sep):
+ return (a,)
+
+
+@array_function_dispatch(_partition_dispatcher)
def partition(a, sep):
"""
Partition each element in `a` around `sep`.
@@ -1040,6 +1141,11 @@ def partition(a, sep):
_vec_string(a, object_, 'partition', (sep,)))
+def _replace_dispatcher(a, old, new, count=None):
+ return (a,)
+
+
+@array_function_dispatch(_replace_dispatcher)
def replace(a, old, new, count=None):
"""
For each element in `a`, return a copy of the string with all
@@ -1072,6 +1178,7 @@ def replace(a, old, new, count=None):
a, object_, 'replace', [old, new] + _clean_args(count)))
+@array_function_dispatch(_count_dispatcher)
def rfind(a, sub, start=0, end=None):
"""
For each element in `a`, return the highest index in the string
@@ -1104,6 +1211,7 @@ def rfind(a, sub, start=0, end=None):
a, integer, 'rfind', [sub, start] + _clean_args(end))
+@array_function_dispatch(_count_dispatcher)
def rindex(a, sub, start=0, end=None):
"""
Like `rfind`, but raises `ValueError` when the substring `sub` is
@@ -1133,6 +1241,7 @@ def rindex(a, sub, start=0, end=None):
a, integer, 'rindex', [sub, start] + _clean_args(end))
+@array_function_dispatch(_just_dispatcher)
def rjust(a, width, fillchar=' '):
"""
Return an array with the elements of `a` right-justified in a
@@ -1168,6 +1277,7 @@ def rjust(a, width, fillchar=' '):
a_arr, (a_arr.dtype.type, size), 'rjust', (width_arr, fillchar))
+@array_function_dispatch(_partition_dispatcher)
def rpartition(a, sep):
"""
Partition (split) each element around the right-most separator.
@@ -1203,6 +1313,11 @@ def rpartition(a, sep):
_vec_string(a, object_, 'rpartition', (sep,)))
+def _split_dispatcher(a, sep=None, maxsplit=None):
+ return (a,)
+
+
+@array_function_dispatch(_split_dispatcher)
def rsplit(a, sep=None, maxsplit=None):
"""
For each element in `a`, return a list of the words in the
@@ -1240,6 +1355,11 @@ def rsplit(a, sep=None, maxsplit=None):
a, object_, 'rsplit', [sep] + _clean_args(maxsplit))
+def _strip_dispatcher(a, chars=None):
+ return (a,)
+
+
+@array_function_dispatch(_strip_dispatcher)
def rstrip(a, chars=None):
"""
For each element in `a`, return a copy with the trailing
@@ -1272,10 +1392,10 @@ def rstrip(a, chars=None):
>>> c = np.array(['aAaAaA', 'abBABba'], dtype='S7'); c
array(['aAaAaA', 'abBABba'],
dtype='|S7')
- >>> np.char.rstrip(c, 'a')
+ >>> np.char.rstrip(c, b'a')
array(['aAaAaA', 'abBABb'],
dtype='|S7')
- >>> np.char.rstrip(c, 'A')
+ >>> np.char.rstrip(c, b'A')
array(['aAaAa', 'abBABba'],
dtype='|S7')
@@ -1284,12 +1404,13 @@ def rstrip(a, chars=None):
return _vec_string(a_arr, a_arr.dtype, 'rstrip', (chars,))
+@array_function_dispatch(_split_dispatcher)
def split(a, sep=None, maxsplit=None):
"""
For each element in `a`, return a list of the words in the
string, using `sep` as the delimiter string.
- Calls `str.rsplit` element-wise.
+ Calls `str.split` element-wise.
Parameters
----------
@@ -1318,6 +1439,11 @@ def split(a, sep=None, maxsplit=None):
a, object_, 'split', [sep] + _clean_args(maxsplit))
+def _splitlines_dispatcher(a, keepends=None):
+ return (a,)
+
+
+@array_function_dispatch(_splitlines_dispatcher)
def splitlines(a, keepends=None):
"""
For each element in `a`, return a list of the lines in the
@@ -1347,6 +1473,11 @@ def splitlines(a, keepends=None):
a, object_, 'splitlines', _clean_args(keepends))
+def _startswith_dispatcher(a, prefix, start=None, end=None):
+ return (a,)
+
+
+@array_function_dispatch(_startswith_dispatcher)
def startswith(a, prefix, start=0, end=None):
"""
Returns a boolean array which is `True` where the string element
@@ -1378,12 +1509,13 @@ def startswith(a, prefix, start=0, end=None):
a, bool_, 'startswith', [prefix, start] + _clean_args(end))
+@array_function_dispatch(_strip_dispatcher)
def strip(a, chars=None):
"""
For each element in `a`, return a copy with the leading and
trailing characters removed.
- Calls `str.rstrip` element-wise.
+ Calls `str.strip` element-wise.
Parameters
----------
@@ -1409,23 +1541,20 @@ def strip(a, chars=None):
--------
>>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
>>> c
- array(['aAaAaA', ' aA ', 'abBABba'],
- dtype='|S7')
+ array(['aAaAaA', ' aA ', 'abBABba'], dtype='<U7')
>>> np.char.strip(c)
- array(['aAaAaA', 'aA', 'abBABba'],
- dtype='|S7')
+ array(['aAaAaA', 'aA', 'abBABba'], dtype='<U7')
>>> np.char.strip(c, 'a') # 'a' unstripped from c[1] because whitespace leads
- array(['AaAaA', ' aA ', 'bBABb'],
- dtype='|S7')
+ array(['AaAaA', ' aA ', 'bBABb'], dtype='<U7')
>>> np.char.strip(c, 'A') # 'A' unstripped from c[1] because (unprinted) ws trails
- array(['aAaAa', ' aA ', 'abBABba'],
- dtype='|S7')
+ array(['aAaAa', ' aA ', 'abBABba'], dtype='<U7')
"""
a_arr = numpy.asarray(a)
return _vec_string(a_arr, a_arr.dtype, 'strip', _clean_args(chars))
+@array_function_dispatch(_unary_op_dispatcher)
def swapcase(a):
"""
Return element-wise a copy of the string with
@@ -1463,6 +1592,7 @@ def swapcase(a):
return _vec_string(a_arr, a_arr.dtype, 'swapcase')
+@array_function_dispatch(_unary_op_dispatcher)
def title(a):
"""
Return element-wise title cased version of string or unicode.
@@ -1502,6 +1632,11 @@ def title(a):
return _vec_string(a_arr, a_arr.dtype, 'title')
+def _translate_dispatcher(a, table, deletechars=None):
+ return (a,)
+
+
+@array_function_dispatch(_translate_dispatcher)
def translate(a, table, deletechars=None):
"""
For each element in `a`, return a copy of the string where all
@@ -1538,6 +1673,7 @@ def translate(a, table, deletechars=None):
a_arr, a_arr.dtype, 'translate', [table] + _clean_args(deletechars))
+@array_function_dispatch(_unary_op_dispatcher)
def upper(a):
"""
Return an array with the elements converted to uppercase.
@@ -1563,17 +1699,20 @@ def upper(a):
Examples
--------
>>> c = np.array(['a1b c', '1bca', 'bca1']); c
- array(['a1b c', '1bca', 'bca1'],
- dtype='|S5')
+ array(['a1b c', '1bca', 'bca1'], dtype='<U5')
>>> np.char.upper(c)
- array(['A1B C', '1BCA', 'BCA1'],
- dtype='|S5')
+ array(['A1B C', '1BCA', 'BCA1'], dtype='<U5')
"""
a_arr = numpy.asarray(a)
return _vec_string(a_arr, a_arr.dtype, 'upper')
+def _zfill_dispatcher(a, width):
+ return (a,)
+
+
+@array_function_dispatch(_zfill_dispatcher)
def zfill(a, width):
"""
Return the numeric string left-filled with zeros
@@ -1604,6 +1743,7 @@ def zfill(a, width):
a_arr, (a_arr.dtype.type, size), 'zfill', (width_arr,))
+@array_function_dispatch(_unary_op_dispatcher)
def isnumeric(a):
"""
For each element, return True if there are only numeric
@@ -1635,6 +1775,7 @@ def isnumeric(a):
return _vec_string(a, bool_, 'isnumeric')
+@array_function_dispatch(_unary_op_dispatcher)
def isdecimal(a):
"""
For each element, return True if there are only decimal
@@ -1666,6 +1807,7 @@ def isdecimal(a):
return _vec_string(a, bool_, 'isdecimal')
+@set_module('numpy')
class chararray(ndarray):
"""
chararray(shape, itemsize=1, unicode=False, buffer=None, offset=0,
@@ -1794,18 +1936,16 @@ class chararray(ndarray):
>>> charar = np.chararray((3, 3))
>>> charar[:] = 'a'
>>> charar
- chararray([['a', 'a', 'a'],
- ['a', 'a', 'a'],
- ['a', 'a', 'a']],
- dtype='|S1')
+ chararray([[b'a', b'a', b'a'],
+ [b'a', b'a', b'a'],
+ [b'a', b'a', b'a']], dtype='|S1')
>>> charar = np.chararray(charar.shape, itemsize=5)
>>> charar[:] = 'abc'
>>> charar
- chararray([['abc', 'abc', 'abc'],
- ['abc', 'abc', 'abc'],
- ['abc', 'abc', 'abc']],
- dtype='|S5')
+ chararray([[b'abc', b'abc', b'abc'],
+ [b'abc', b'abc', b'abc'],
+ [b'abc', b'abc', b'abc']], dtype='|S5')
"""
def __new__(subtype, shape, itemsize=1, unicode=False, buffer=None,