summaryrefslogtreecommitdiff
path: root/numpy/core/defchararray.py
diff options
context:
space:
mode:
authormdroe <mdroe@localhost>2009-10-12 15:24:43 +0000
committermdroe <mdroe@localhost>2009-10-12 15:24:43 +0000
commit7deb4d5db4282e1e1c860dda268a21c6979e9725 (patch)
tree6dbd07a1dfcec326ce90228c17cbdd337f4f1621 /numpy/core/defchararray.py
parent66c62a8aeed8c963e2ef108685d5681e1e445f3d (diff)
downloadnumpy-7deb4d5db4282e1e1c860dda268a21c6979e9725.tar.gz
Many improvements to chararray. Fixes Trac bugs 1199, 856, 855, 1231, 1235, 1240, 1241. docstrings, full unit-test coverage, C-based vectorized string operations.
Diffstat (limited to 'numpy/core/defchararray.py')
-rw-r--r--numpy/core/defchararray.py2802
1 files changed, 2261 insertions, 541 deletions
diff --git a/numpy/core/defchararray.py b/numpy/core/defchararray.py
index 580bde59a..b59f27065 100644
--- a/numpy/core/defchararray.py
+++ b/numpy/core/defchararray.py
@@ -1,75 +1,1672 @@
"""
-Module for character arrays.
+This module contains a set of functions for vectorized string
+operations and methods.
.. note::
- The chararray module exists for backwards compatibility with Numarray,
- it is not recommended for new development. If one needs arrays of
- strings, use arrays of `dtype` object.
+ The chararray module exists for backwards compatibility with
+ Numarray, it is not recommended for new development. If one needs
+ arrays of strings, use arrays of `dtype` `object_`, `string_` or
+ `unicode_`.
-The preferred alias for `defchararray` is `numpy.char`.
+Methods will only be available if the corresponding str method is
+available in your version of Python.
+The preferred alias for `defchararray` is `numpy.char`.
"""
+
import sys
-from numerictypes import string_, unicode_, integer, object_
-from numeric import ndarray, broadcast, empty, compare_chararrays
+from numerictypes import string_, unicode_, integer, object_, bool_, character
+from numeric import ndarray, compare_chararrays
from numeric import array as narray
+from numpy.core.multiarray import _vec_string
+import numpy
__all__ = ['chararray']
_globalvar = 0
_unicode = unicode
+_len = len
+
+def _use_unicode(*args):
+ """
+ Helper function for determining the output type of some string
+ operations.
+
+ For an operation on two ndarrays, if at least one is unicode, the
+ result should be unicode.
+ """
+ for x in args:
+ if (isinstance(x, _unicode)
+ or issubclass(numpy.asarray(x).dtype.type, unicode_)):
+ return unicode_
+ return string_
+
+def _to_string_or_unicode_array(result):
+ """
+ Helper function to cast a result back into a string or unicode array
+ if an object array must be used as an intermediary.
+ """
+ return numpy.asarray(result.tolist())
+
+def _clean_args(*args):
+ """
+ Helper function for delegating arguments to Python string
+ functions.
+
+ Many of the Python string operations that have optional arguments
+ do not use 'None' to indicate a default value. In these cases,
+ we need to remove all `None` arguments, and those following them.
+ """
+ newargs = []
+ for chk in args:
+ if chk is None:
+ break
+ newargs.append(chk)
+ return newargs
+
+def _get_num_chars(a):
+ """
+ Helper function that returns the number of characters per field in
+ a string or unicode array. This is to abstract out the fact that
+ for a unicode array this is itemsize / 4.
+ """
+ if issubclass(a.dtype.type, unicode_):
+ return a.itemsize / 4
+ return a.itemsize
+
+
+def equal(x1, x2):
+ """
+ Return (x1 == x2) element-wise.
+
+ Unlike `numpy.equal`, this comparison is performed by first
+ stripping whitespace characters from the end of the string. This
+ behavior is provided for backward-compatibility with numarray.
+
+ Parameters
+ ----------
+ x1, x2 : array_like of string_ or unicode_
+ Input arrays of the same shape.
+
+ Returns
+ -------
+ out : {ndarray, bool}
+ Output array of bools, or a single bool if x1 and x2 are scalars.
+
+ See Also
+ --------
+ not_equal, greater_equal, less_equal, greater, less
+ """
+ return compare_chararrays(x1, x2, '==', True)
+
+def not_equal(x1, x2):
+ """
+ Return (x1 != x2) element-wise.
+
+ Unlike `numpy.not_equal`, this comparison is performed by first
+ stripping whitespace characters from the end of the string. This
+ behavior is provided for backward-compatibility with numarray.
+
+ Parameters
+ ----------
+ x1, x2 : array_like of string_ or unicode_
+ Input arrays of the same shape.
+
+ Returns
+ -------
+ out : {ndarray, bool}
+ Output array of bools, or a single bool if x1 and x2 are scalars.
+
+ See Also
+ --------
+ equal, greater_equal, less_equal, greater, less
+ """
+ return compare_chararrays(x1, x2, '!=', True)
+
+def greater_equal(x1, x2):
+ """
+ Return (x1 >= x2) element-wise.
+
+ Unlike `numpy.greater_equal`, this comparison is performed by
+ first stripping whitespace characters from the end of the string.
+ This behavior is provided for backward-compatibility with
+ numarray.
+
+ Parameters
+ ----------
+ x1, x2 : array_like of string_ or unicode_
+ Input arrays of the same shape.
+
+ Returns
+ -------
+ out : {ndarray, bool}
+ Output array of bools, or a single bool if x1 and x2 are scalars.
+
+ See Also
+ --------
+ equal, not_equal, less_equal, greater, less
+ """
+ return compare_chararrays(x1, x2, '>=', True)
+
+def less_equal(x1, x2):
+ """
+ Return (x1 <= x2) element-wise.
+
+ Unlike `numpy.less_equal`, this comparison is performed by first
+ stripping whitespace characters from the end of the string. This
+ behavior is provided for backward-compatibility with numarray.
+
+ Parameters
+ ----------
+ x1, x2 : array_like of string_ or unicode_
+ Input arrays of the same shape.
+
+ Returns
+ -------
+ out : {ndarray, bool}
+ Output array of bools, or a single bool if x1 and x2 are scalars.
+
+ See Also
+ --------
+ equal, not_equal, greater_equal, greater, less
+ """
+ return compare_chararrays(x1, x2, '<=', True)
+
+def greater(x1, x2):
+ """
+ Return (x1 > x2) element-wise.
+
+ Unlike `numpy.greater`, this comparison is performed by first
+ stripping whitespace characters from the end of the string. This
+ behavior is provided for backward-compatibility with numarray.
+
+ Parameters
+ ----------
+ x1, x2 : array_like of string_ or unicode_
+ Input arrays of the same shape.
+
+ Returns
+ -------
+ out : {ndarray, bool}
+ Output array of bools, or a single bool if x1 and x2 are scalars.
+
+ See Also
+ --------
+ equal, not_equal, greater_equal, less_equal, less
+ """
+ return compare_chararrays(x1, x2, '>', True)
+
+def less(x1, x2):
+ """
+ Return (x1 < x2) element-wise.
+
+ Unlike `numpy.greater`, this comparison is performed by first
+ stripping whitespace characters from the end of the string. This
+ behavior is provided for backward-compatibility with numarray.
+
+ Parameters
+ ----------
+ x1, x2 : array_like of string_ or unicode_
+ Input arrays of the same shape.
+
+ Returns
+ -------
+ out : {ndarray, bool}
+ Output array of bools, or a single bool if x1 and x2 are scalars.
+
+ See Also
+ --------
+ equal, not_equal, greater_equal, less_equal, greater
+ """
+ return compare_chararrays(x1, x2, '<', True)
+
+def str_len(a):
+ """
+ Return len(a) element-wise.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+
+ Returns
+ -------
+ out : ndarray
+ Output array of integers
+
+ See also
+ --------
+ __builtin__.len
+ """
+ return _vec_string(a, integer, '__len__')
+
+def add(x1, x2):
+ """
+ Return (x1 + x2), that is string concatenation, element-wise for a
+ pair of array_likes of string_ or unicode_.
+
+ Parameters
+ ----------
+ x1 : array_like of string_ or unicode_
+ x2 : array_like of string_ or unicode_
+
+ Returns
+ -------
+ out : ndarray
+ Output array of string_ or unicode_, depending on input types
+ """
+ arr1 = numpy.asarray(x1)
+ arr2 = numpy.asarray(x2)
+ out_size = _get_num_chars(arr1) + _get_num_chars(arr2)
+ dtype = _use_unicode(arr1, arr2)
+ return _vec_string(arr1, (dtype, out_size), '__add__', (arr2,))
+
+def multiply(a, i):
+ """
+ Return (a * i), that is string multiple concatenation,
+ element-wise.
+
+ Values in `i` of less than 0 are treated as 0 (which yields an
+ empty string).
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+ i : array_like of integers
+
+ Returns
+ -------
+ out : ndarray
+ Output array of str or unicode, depending on input types
+ """
+ a_arr = numpy.asarray(a)
+ i_arr = numpy.asarray(i)
+ if not issubclass(i_arr.dtype.type, integer):
+ raise ValueError, "Can only multiply by integers"
+ out_size = _get_num_chars(a_arr) * max(long(i_arr.max()), 0)
+ return _vec_string(
+ a_arr, (a_arr.dtype.type, out_size), '__mul__', (i_arr,))
+
+def mod(a, values):
+ """
+ Return (a % i), that is pre-Python 2.6 string formatting
+ (iterpolation), element-wise for a pair of array_likes of string_
+ or unicode_.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+ values : array_like of values
+ These values will be element-wise interpolated into the string.
+
+ Returns
+ -------
+ out : ndarray
+ Output array of string_ or unicode_, depending on input types
+
+ See also
+ --------
+ str.__mod__
+ """
+ return _to_string_or_unicode_array(
+ _vec_string(a, object_, '__mod__', (values,)))
+
+def capitalize(a):
+ """
+ Return a copy of `a` with only the first character of each element
+ capitalized.
-# special sub-class for character arrays (string_ and unicode_)
-# This adds + and * operations and methods of str and unicode types
-# which operate on an element-by-element basis
+ Calls `str.capitalize` element-wise.
+
+ For 8-bit strings, this method is locale-dependent.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+
+ Returns
+ -------
+ out : ndarray
+ Output array of string_ or unicode_, depending on input types
+
+ See also
+ --------
+ str.capitalize
+
+ Examples
+ --------
+ >>> c = np.array(['a1b2','1b2a','b2a1','2a1b'],'S4'); c
+ array(['a1b2', '1b2a', 'b2a1', '2a1b'],
+ dtype='|S4')
+ >>> np.char.capitalize(c)
+ array(['A1b2', '1b2a', 'B2a1', '2a1b'],
+ dtype='|S4')
+ """
+ a_arr = numpy.asarray(a)
+ return _vec_string(a_arr, a_arr.dtype, 'capitalize')
+
+if sys.version_info >= (2, 4):
+ def center(a, width, fillchar=' '):
+ """
+ Return a copy of `a` with its elements centered in a string of
+ length `width`.
+
+ Calls `str.center` element-wise.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+ width : int
+ The length of the resulting strings
+ fillchar : str or unicode, optional
+ The padding character to use (default is space).
+
+ Returns
+ -------
+ out : ndarray
+ Output array of string_ or unicode_, depending on input types
+
+ See also
+ --------
+ str.center
+ """
+ a_arr = numpy.asarray(a)
+ width_arr = numpy.asarray(width)
+ size = long(numpy.max(width_arr.flat))
+ return _vec_string(
+ a_arr, (a_arr.dtype.type, size), 'center', (width_arr, fillchar))
+else:
+ def center(a, width):
+ """
+ Return an array with the elements of `a` centered in a string
+ of length width.
+
+ Calls `str.center` element-wise.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+ width : int
+ The length of the resulting strings
+
+ Returns
+ -------
+ out : ndarray, str or unicode
+ Output array of string_ or unicode_, depending on input types
+
+ See also
+ --------
+ str.center
+ """
+ a_arr = numpy.asarray(a)
+ width_arr = numpy.asarray(width)
+ size = long(numpy.max(width_arr.flat))
+ return _vec_string(
+ a_arr, (a_arr.dtype.type, size), 'center', (width_arr,))
+
+def count(a, sub, start=0, end=None):
+ """
+ Returns an array with the number of non-overlapping occurrences of
+ substring `sub` in the range [`start`, `end`].
+
+ Calls `str.count` element-wise.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+ sub : str or unicode
+ The substring to search for
+ start, end : int, optional
+ Optional arguments `start` and `end` are interpreted as slice
+ notation to specify the range in which to count.
+
+ Returns
+ -------
+ out : ndarray
+ Output array of integers.
+
+ See also
+ --------
+ str.count
+
+ Examples
+ --------
+ >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
+ >>> c
+ array(['aAaAaA', ' aA', 'abBABba'],
+ dtype='|S7')
+ >>> np.char.count(c, 'A')
+ array([3, 1, 1])
+ >>> np.char.count(c, 'aA')
+ array([3, 1, 0])
+ >>> np.char.count(c, 'A', start=1, end=4)
+ array([2, 1, 1])
+ >>> np.char.count(c, 'A', start=1, end=3)
+ array([1, 0, 0])
+ """
+ return _vec_string(a, integer, 'count', [sub, start] + _clean_args(end))
+
+def decode(a, encoding=None, errors=None):
+ """
+ Calls `str.decode` element-wise.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+ encoding : str, optional
+ The name of an encoding
+ errors : str, optional
+ Specifies how to handle encoding errors
+
+ Returns
+ -------
+ out : ndarray
+
+ Notes
+ -----
+ The type of the result will depend on the encoding specified.
+
+ See also
+ --------
+ str.decode
+
+ Examples
+ --------
+ >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
+ >>> c
+ array(['aAaAaA', ' aA', 'abBABba'],
+ dtype='|S7')
+ >>> np.char.encode(c, encoding='cp037')
+ array(['\\x81\\xc1\\x81\\xc1\\x81\\xc1', '@@\\x81\\xc1@@',
+ '\\x81\\x82\\xc2\\xc1\\xc2\\x82\\x81'],
+ dtype='|S7')
+ """
+ return _to_string_or_unicode_array(
+ _vec_string(a, object_, 'decode', _clean_args(encoding, errors)))
+
+def encode(a, encoding=None, errors=None):
+ """
+ Calls `str.encode` element-wise.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+ encoding : str, optional
+ The name of an encoding
+ errors : str, optional
+ Specifies how to handle encoding errors
+
+ Returns
+ -------
+ out : ndarray
+
+ Notes
+ -----
+ The type of the result will depend on the encoding specified.
+
+ See also
+ --------
+ str.encode
+ """
+ return _to_string_or_unicode_array(
+ _vec_string(a, object_, 'encode', _clean_args(encoding, errors)))
+
+def endswith(a, suffix, start=0, end=None):
+ """
+ Returns a boolean array which is `True` where the string element
+ in `a` ends with `suffix`, otherwise `False`.
+
+ Calls `str.endswith` element-wise.
+
+ Parameters
+ ----------
+ a : array_like of string or unicode
+ suffix : str
+ start, end : int, optional
+ With optional `start`, test beginning at that position. With
+ optional `end`, stop comparing at that position.
+
+ Returns
+ -------
+ out : ndarray
+ Outputs an array of booleans.
+
+ See also
+ --------
+ str.endswith
+
+ Examples
+ --------
+ >>> s = np.array(['foo', 'bar'])
+ >>> s[0] = 'foo'
+ >>> s[1] = 'bar'
+ >>> s
+ array(['foo', 'bar'],
+ dtype='|S3')
+ >>> np.charendswith(s, 'ar')
+ array([False, True], dtype=bool)
+ >>> s.endswith(s, 'a', start=1, end=2)
+ array([False, True], dtype=bool)
+ """
+ return _vec_string(
+ a, bool_, 'endswith', [suffix, start] + _clean_args(end))
+
+def expandtabs(a, tabsize=8):
+ """
+ Return a copy of each string element where all tab characters are
+ replaced by one or more spaces.
+
+ Calls `str.expandtabs` element-wise.
+
+ Return a copy of each string element where all tab characters are
+ replaced by one or more spaces, depending on the current column
+ and the given `tabsize`. The column number is reset to zero after
+ each newline occurring in the string. If `tabsize` is not given, a
+ tab size of 8 characters is assumed. This doesn’t understand other
+ non-printing characters or escape sequences.
+
+ Parameters
+ ----------
+ a : array_like of string or unicode
+ tabsize : int, optional
+
+ Returns
+ -------
+ out : ndarray
+ Output array of str or unicode, depending on input type
+
+ See also
+ --------
+ str.expandtabs
+ """
+ return _to_string_or_unicode_array(
+ _vec_string(a, object_, 'expandtabs', (tabsize,)))
+
+def find(a, sub, start=0, end=None):
+ """
+ For each element, return the lowest index in the string where
+ substring `sub` is found.
+
+ Calls `str.find` element-wise.
+
+ For each element, return the lowest index in the string where
+ substring `sub` is found, such that `sub` is contained in the
+ range [`start`, `end`].
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+ sub : str or unicode
+ start, end : int, optional
+ Optional arguments `start` and `end` are interpreted as in
+ slice notation.
+
+ Returns
+ -------
+ out : {ndarray, integer}
+ Output array of integers. Returns -1 if `sub` is not found.
+
+ See also
+ --------
+ str.find
+ """
+ return _vec_string(
+ a, integer, 'find', [sub, start] + _clean_args(end))
+
+if sys.version_info >= (2.6):
+ def format(a, *args, **kwargs):
+ # _vec_string doesn't support kwargs at present
+ raise NotImplementedError
+
+def index(a, sub, start=0, end=None):
+ """
+ Like `find`, but raises `ValueError` when the substring is not found.
+
+ Calls `str.index` element-wise.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+ sub : str or unicode
+ start, end : int, optional
+
+ Returns
+ -------
+ out : ndarray
+ Output array of integers. Returns -1 if `sub` is not found.
+
+ See also
+ --------
+ find, str.find
+ """
+ return _vec_string(
+ a, integer, 'index', [sub, start] + _clean_args(end))
+
+def isalnum(a):
+ """
+ Returns true for each element if all characters in the string are
+ alphanumeric and there is at least one character, false otherwise.
+
+ Calls `str.isalnum` element-wise.
+
+ For 8-bit strings, this method is locale-dependent.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+
+ Returns
+ -------
+ out : ndarray
+ Output array of str or unicode, depending on input type
+
+ See also
+ --------
+ str.isalnum
+ """
+ return _vec_string(a, bool_, 'isalnum')
+
+def isalpha(a):
+ """
+ Returns true for each element if all characters in the string are
+ alphabetic and there is at least one character, false otherwise.
+
+ Calls `str.isalpha` element-wise.
+
+ For 8-bit strings, this method is locale-dependent.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+
+ Returns
+ -------
+ out : ndarray
+ Output array of booleans
+
+ See also
+ --------
+ str.isalpha
+ """
+ return _vec_string(a, bool_, 'isalpha')
+
+def isdigit(a):
+ """
+ Returns true for each element if all characters in the string are
+ digits and there is at least one character, false otherwise.
+
+ Calls `str.isdigit` element-wise.
+
+ For 8-bit strings, this method is locale-dependent.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+
+ Returns
+ -------
+ out : ndarray
+ Output array of booleans
+
+ See also
+ --------
+ str.isdigit
+ """
+ return _vec_string(a, bool_, 'isdigit')
+
+def islower(a):
+ """
+ Returns true for each element if all cased characters in the
+ string are lowercase and there is at least one cased character,
+ false otherwise.
+
+ Calls `str.islower` element-wise.
+
+ For 8-bit strings, this method is locale-dependent.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+
+ Returns
+ -------
+ out : ndarray
+ Output array of booleans
+
+ See also
+ --------
+ str.islower
+ """
+ return _vec_string(a, bool_, 'islower')
+
+def isspace(a):
+ """
+ Returns true for each element if there are only whitespace
+ characters in the string and there is at least one character,
+ false otherwise.
+
+ Calls `str.isspace` element-wise.
+
+ For 8-bit strings, this method is locale-dependent.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+
+ Returns
+ -------
+ out : ndarray
+ Output array of booleans
+
+ See also
+ --------
+ str.isspace
+ """
+ return _vec_string(a, bool_, 'isspace')
+
+def istitle(a):
+ """
+ Returns true for each element if the element is a titlecased
+ string and there is at least one character, false otherwise.
+
+ Call `str.istitle` element-wise.
+
+ For 8-bit strings, this method is locale-dependent.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+
+ Returns
+ -------
+ out : ndarray
+ Output array of booleans
+
+ See also
+ --------
+ str.istitle
+ """
+ return _vec_string(a, bool_, 'istitle')
+
+def isupper(a):
+ """
+ Returns true for each element if all cased characters in the
+ string are uppercase and there is at least one character, false
+ otherwise.
+
+ Call `str.isupper` element-wise.
+
+ For 8-bit strings, this method is locale-dependent.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+
+ Returns
+ -------
+ out : ndarray
+ Output array of booleans
+
+ See also
+ --------
+ str.isupper
+ """
+ return _vec_string(a, bool_, 'isupper')
+
+def join(sep, seq):
+ """
+ Return a string which is the concatenation of the strings in the
+ sequence `seq`.
+
+ Calls `str.join` element-wise.
+
+ Parameters
+ ----------
+ sep : array_like of string_ or unicode_
+ seq : array_like of string_ or unicode_
+
+ Returns
+ -------
+ out : ndarray
+ Output array of string_ or unicode_, depending on input types
+
+ See also
+ --------
+ str.join
+ """
+ return _to_string_or_unicode_array(
+ _vec_string(sep, object_, 'join', (seq,)))
+
+if sys.version_info >= (2, 4):
+ def ljust(a, width, fillchar=' '):
+ """
+ Return an array with the elements of `a` left-justified in a
+ string of length `width`.
+
+ Calls `str.ljust` element-wise.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+ width : int
+ The length of the resulting strings
+ fillchar : str or unicode, optional
+ The character to use for padding
+
+ Returns
+ -------
+ out : ndarray
+ Output array of str or unicode, depending on input type
+
+ See also
+ --------
+ str.ljust
+ """
+ a_arr = numpy.asarray(a)
+ width_arr = numpy.asarray(width)
+ size = long(numpy.max(width_arr.flat))
+ return _vec_string(
+ a_arr, (a_arr.dtype.type, size), 'ljust', (width_arr, fillchar))
+else:
+ def ljust(a, width):
+ """
+ Return an array with the elements of `a` left-justified in a
+ string of length `width`.
+
+ Calls `str.ljust` element-wise.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+ width : int
+ The length of the resulting strings
+
+ Returns
+ -------
+ out : ndarray
+ Output array of str or unicode, depending on input type
+
+ See also
+ --------
+ str.ljust
+ """
+ a_arr = numpy.asarray(a)
+ width_arr = numpy.asarray(width)
+ size = long(numpy.max(width_arr.flat))
+ return _vec_string(
+ a_arr, (a_arr.dtype.type, size), 'ljust', (width_arr,))
+
+def lower(a):
+ """
+ Return an array with the elements of `a` converted to lowercase.
+
+ Call `str.lower` element-wise.
+
+ For 8-bit strings, this method is locale-dependent.
+
+ Parameters
+ ----------
+ a : array-like of str or unicode
+
+ Returns
+ -------
+ out : ndarray, str or unicode
+ Output array of str or unicode, depending on input type
+
+ See also
+ --------
+ str.lower
+
+ Examples
+ --------
+ >>> c = np.array(['A1B C', '1BCA', 'BCA1']); c
+ array(['A1B C', '1BCA', 'BCA1'],
+ dtype='|S5')
+ >>> np.char.lower(c)
+ array(['a1b c', '1bca', 'bca1'],
+ dtype='|S5')
+ """
+ a_arr = numpy.asarray(a)
+ return _vec_string(a_arr, a_arr.dtype, 'lower')
+
+def lstrip(a, chars=None):
+ """
+ For each element in `a`, return a copy with the leading characters
+ removed.
+
+ Calls `str.lstrip` element-wise.
+
+ Parameters
+ ----------
+ a : array-like of str or unicode
+ chars : str or unicode, optional
+ The `chars` argument is a string specifying the set of
+ characters to be removed. If omitted or None, the `chars`
+ argument defaults to removing whitespace. The `chars` argument
+ is not a prefix; rather, all combinations of its values are
+ stripped.
+
+ Returns
+ -------
+ out : ndarray, str or unicode
+ Output array of str or unicode, depending on input type
+
+ See also
+ --------
+ str.lstrip
+
+ Examples
+ --------
+ >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
+ >>> c
+ array(['aAaAaA', ' aA', 'abBABba'],
+ dtype='|S7')
+ >>> np.char.lstrip(c, 'a') # 'a' unstripped from c[1] because whitespace leading
+ array(['AaAaA', ' aA', 'bBABba'],
+ dtype='|S6')
+ >>> np.char.lstrip(c, 'A') # leaves c unchanged
+ array(['aAaAaA', ' aA', 'abBABba'],
+ dtype='|S7')
+ >>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, '')).all()
+ True
+ >>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, None)).all()
+ True
+ """
+ a_arr = numpy.asarray(a)
+ return _vec_string(a_arr, a_arr.dtype, 'lstrip', (chars,))
+
+if sys.version_info >= (2, 5):
+ def partition(a, sep):
+ """
+ Partition each element in `a` around `sep`.
+
+ Calls `str.partition` element-wise.
+
+ For each element in `a`, split the element as the first
+ occurrence of `sep`, and return 3 strings containing the part
+ before the separator, the separator itself, and the part after
+ the separator. If the separator is not found, return 3 strings
+ containing the string itself, followed by two empty strings.
+
+ Parameters
+ ----------
+ a : array-like of str or unicode
+ sep : str or unicode
+
+ Returns
+ -------
+ out : ndarray
+ Output array of string or unicode, depending on input
+ type. The output array will have an extra dimension with
+ 3 elements per input element.
+
+ See also
+ --------
+ str.partition
+ """
+ return _to_string_or_unicode_array(
+ _vec_string(a, object_, 'partition', (sep,)))
+
+def replace(a, old, new, count=None):
+ """
+ For each element in `a`, return a copy of the string with all
+ occurrences of substring `old` replaced by `new`.
+
+ Calls `str.replace` element-wise.
+
+ Parameters
+ ----------
+ a : array-like of str or unicode
+ old, new : str or unicode
+ count : int, optional
+ If the optional argument `count` is given, only the first
+ `count` occurrences are replaced.
+
+ Returns
+ -------
+ out : ndarray
+ Output array of str or unicode, depending on input type
+
+ See also
+ --------
+ str.replace
+ """
+ return _to_string_or_unicode_array(
+ _vec_string(
+ a, object_, 'replace', [old, new] +_clean_args(count)))
+
+def rfind(a, sub, start=0, end=None):
+ """
+ For each element in `a`, return the highest index in the string
+ where substring `sub` is found, such that `sub` is contained
+ within [`start`, `end`].
+
+ Calls `str.rfind` element-wise.
+
+ Parameters
+ ----------
+ a : array-like of str or unicode
+ sub : str or unicode
+ start, end : int, optional
+ Optional arguments `start` and `end` are interpreted as in
+ slice notation.
+
+ Returns
+ -------
+ out : ndarray
+ Output array of integers. Return -1 on failure.
+
+ See also
+ --------
+ str.rfind
+ """
+ return _vec_string(
+ a, integer, 'rfind', [sub, start] + _clean_args(end))
+
+def rindex(a, sub, start=0, end=None):
+ """
+ Like `rfind`, but raises `ValueError` when the substring `sub` is
+ not found.
+
+ Calls `str.rindex` element-wise.
+
+ Parameters
+ ----------
+ a : array-like of str or unicode
+ sub : str or unicode
+ start, end : int, optional
+
+ Returns
+ -------
+ out : ndarray
+ Output array of integers.
+
+ See also
+ --------
+ rfind, str.rindex
+ """
+ return _vec_string(
+ a, integer, 'rindex', [sub, start] + _clean_args(end))
+
+if sys.version_info >= (2, 4):
+ def rjust(a, width, fillchar=' '):
+ """
+ Return an array with the elements of `a` right-justified in a
+ string of length `width`.
+
+ Calls `str.rjust` element-wise.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+ width : int
+ The length of the resulting strings
+ fillchar : str or unicode, optional
+ The character to use for padding
+
+ Returns
+ -------
+ out : ndarray
+ Output array of str or unicode, depending on input type
+
+ See also
+ --------
+ str.rjust
+ """
+ a_arr = numpy.asarray(a)
+ width_arr = numpy.asarray(width)
+ size = long(numpy.max(width_arr.flat))
+ return _vec_string(
+ a_arr, (a_arr.dtype.type, size), 'rjust', (width_arr, fillchar))
+else:
+ def rjust(a, width):
+ """
+ Return an array with the elements of `a` right-justified in a
+ string of length `width`.
+
+ Calls `str.rjust` element-wise.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+ width : int
+ The length of the resulting strings
+
+ Returns
+ -------
+ out : ndarray
+ Output array of str or unicode, depending on input type
+
+ See also
+ --------
+ str.rjust
+ """
+ a_arr = numpy.asarray(a)
+ width_arr = numpy.asarray(width)
+ size = long(numpy.max(width_arr.flat))
+ return _vec_string(
+ a_arr, (a_arr.dtype.type, size), 'rjust', (width,))
+
+if sys.version_info >= (2, 5):
+ def rpartition(a, sep):
+ """
+ Partition each element in `a` around `sep`.
+
+ Calls `str.rpartition` element-wise.
+
+ For each element in `a`, split the element as the last
+ occurrence of `sep`, and return 3 strings containing the part
+ before the separator, the separator itself, and the part after
+ the separator. If the separator is not found, return 3 strings
+ containing the string itself, followed by two empty strings.
+
+ Parameters
+ ----------
+ a : array-like of str or unicode
+ sep : str or unicode
+
+ Returns
+ -------
+ out : ndarray
+ Output array of string or unicode, depending on input
+ type. The output array will have an extra dimension with
+ 3 elements per input element.
+
+ See also
+ --------
+ str.rpartition
+ """
+ return _to_string_or_unicode_array(
+ _vec_string(a, object_, 'rpartition', (sep,)))
+
+if sys.version_info >= (2, 4):
+ def rsplit(a, sep=None, maxsplit=None):
+ """
+ For each element in `a`, return a list of the words in the
+ string, using `sep` as the delimiter string.
+
+ Calls `str.rsplit` element-wise.
+
+ Except for splitting from the right, `rsplit`
+ behaves like `split`.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+ sep : str or unicode, optional
+ If `sep` is not specified or `None`, any whitespace string
+ is a separator.
+ maxsplit : int, optional
+ If `maxsplit` is given, at most `maxsplit` splits are done,
+ the rightmost ones.
+
+ Returns
+ -------
+ out : ndarray
+ Array of list objects
+
+ See also
+ --------
+ str.rsplit, split
+ """
+ # This will return an array of lists of different sizes, so we
+ # leave it as an object array
+ return _vec_string(
+ a, object_, 'rsplit', [sep] + _clean_args(maxsplit))
+
+def rstrip(a, chars=None):
+ """
+ For each element in `a`, return a copy with the trailing
+ characters removed.
+
+ Calls `str.rstrip` element-wise.
+
+ Parameters
+ ----------
+ a : array-like of str or unicode
+ chars : str or unicode, optional
+ The `chars` argument is a string specifying the set of
+ characters to be removed. If omitted or None, the `chars`
+ argument defaults to removing whitespace. The `chars` argument
+ is not a suffix; rather, all combinations of its values are
+ stripped.
+
+ Returns
+ -------
+ out : ndarray
+ Output array of str or unicode, depending on input type
+
+ See also
+ --------
+ str.rstrip
+
+ Examples
+ --------
+ >>> c = np.array(['aAaAaA', 'abBABba'], dtype='S7'); c
+ array(['aAaAaA', 'abBABba'],
+ dtype='|S7')
+ >>> np.char.rstrip(c, 'a')
+ array(['aAaAaA', 'abBABb'],
+ dtype='|S6')
+ >>> np.char.rstrip(c, 'A')
+ array(['aAaAa', 'abBABba'],
+ dtype='|S7')
+ """
+ a_arr = numpy.asarray(a)
+ return _vec_string(a_arr, a_arr.dtype, 'rstrip', (chars,))
+
+def split(a, sep=None, maxsplit=None):
+ """
+ For each element in `a`, return a list of the words in the
+ string, using `sep` as the delimiter string.
+
+ Calls `str.rsplit` element-wise.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+ sep : str or unicode, optional
+ If `sep` is not specified or `None`, any whitespace string is a
+ separator.
+ maxsplit : int, optional
+ If `maxsplit` is given, at most `maxsplit` splits are done.
+
+ Returns
+ -------
+ out : ndarray
+ Array of list objects
+
+ See also
+ --------
+ str.split, rsplit
+ """
+ # This will return an array of lists of different sizes, so we
+ # leave it as an object array
+ return _vec_string(
+ a, object_, 'split', [sep] + _clean_args(maxsplit))
+
+def splitlines(a, keepends=None):
+ """
+ For each element in `a`, return a list of the lines in the
+ element, breaking at line boundaries.
+
+ Calls `str.splitlines` element-wise.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+ keepends : bool, optional
+ Line breaks are not included in the resulting list unless
+ keepends is given and true.
+
+ Returns
+ -------
+ out : ndarray
+ Array of list objects
+
+ See also
+ --------
+ str.splitlines
+ """
+ return _vec_string(
+ a, object_, 'splitlines', _clean_args(keepends))
+
+def startswith(a, prefix, start=0, end=None):
+ """
+ Returns a boolean array which is `True` where the string element
+ in `a` starts with `prefix`, otherwise `False`.
+
+ Calls `str.startswith` element-wise.
+
+ Parameters
+ ----------
+ a : array_like of string or unicode
+ suffix : str
+ start, end : int, optional
+ end : int, optional
+ With optional `start`, test beginning at that position. With
+ optional `end`, stop comparing at that position.
+
+ Returns
+ -------
+ out : ndarray
+ Array of booleans
+
+ See also
+ --------
+ str.startswith
+ """
+ return _vec_string(
+ a, bool_, 'startswith', [prefix, start] + _clean_args(end))
+
+def strip(a, chars=None):
+ """
+ For each element in `a`, return a copy with the leading and
+ trailing characters removed.
+
+ Calls `str.rstrip` element-wise.
+
+ Parameters
+ ----------
+ a : array-like of str or unicode
+ chars : str or unicode, optional
+ The `chars` argument is a string specifying the set of
+ characters to be removed. If omitted or None, the `chars`
+ argument defaults to removing whitespace. The `chars` argument
+ is not a prefix or suffix; rather, all combinations of its
+ values are stripped.
+
+ Returns
+ -------
+ out : ndarray
+ Output array of str or unicode, depending on input type
+
+ See also
+ --------
+ str.strip
+
+ Examples
+ --------
+ >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
+ >>> c
+ array(['aAaAaA', ' aA', 'abBABba'],
+ dtype='|S7')
+ >>> np.char.strip(c)
+ array(['aAaAaA', 'aA', 'abBABba'],
+ dtype='|S7')
+ >>> np.char.strip(c, 'a') # 'a' unstripped from c[1] because whitespace leads
+ array(['AaAaA', ' aA', 'bBABb'],
+ dtype='|S6')
+ >>> np.char.strip(c, 'A') # 'A' unstripped from c[1] because (unprinted) ws trails
+ array(['aAaAa', ' aA', 'abBABba'],
+ dtype='|S7')
+ """
+ a_arr = numpy.asarray(a)
+ return _vec_string(a_arr, a_arr.dtype, 'strip', _clean_args(chars))
+
+def swapcase(a):
+ """
+ For each element in `a`, return a copy of the string with
+ uppercase characters converted to lowercase and vice versa.
+
+ Calls `str.swapcase` element-wise.
+
+ For 8-bit strings, this method is locale-dependent.
+
+ Parameters
+ ----------
+ a : array-like of str or unicode
+
+ Returns
+ -------
+ out : ndarray
+ Output array of str or unicode, depending on input type
+
+ See also
+ --------
+ str.swapcase
+
+ Examples
+ --------
+ >>> c=np.array(['a1B c','1b Ca','b Ca1','cA1b'],'S5'); c
+ array(['a1B c', '1b Ca', 'b Ca1', 'cA1b'],
+ dtype='|S5')
+ >>> np.char.swapcase(c)
+ array(['A1b C', '1B cA', 'B cA1', 'Ca1B'],
+ dtype='|S5')
+ """
+ a_arr = numpy.asarray(a)
+ return _vec_string(a_arr, a_arr.dtype, 'swapcase')
+
+def title(a):
+ """
+ For each element in `a`, return a titlecased version of the
+ string: words start with uppercase characters, all remaining cased
+ characters are lowercase.
+
+ Calls `str.title` element-wise.
+
+ For 8-bit strings, this method is locale-dependent.
+
+ Parameters
+ ----------
+ a : array-like of str or unicode
+
+ Returns
+ -------
+ out : ndarray
+ Output array of str or unicode, depending on input type
+
+ See also
+ --------
+ str.title
+
+ Examples
+ --------
+ >>> c=np.array(['a1b c','1b ca','b ca1','ca1b'],'S5'); c
+ array(['a1b c', '1b ca', 'b ca1', 'ca1b'],
+ dtype='|S5')
+ >>> np.char.title(c)
+ chararray(['A1B C', '1B Ca', 'B Ca1', 'Ca1B'],
+ dtype='|S5')
+ """
+ a_arr = numpy.asarray(a)
+ return _vec_string(a_arr, a_arr.dtype, 'title')
+
+def translate(a, table, deletechars=None):
+ """
+ For each element in `a`, return a copy of the string where all
+ characters occurring in the optional argument `deletechars` are
+ removed, and the remaining characters have been mapped through the
+ given translation table.
+
+ Calls `str.translate` element-wise.
+
+ Parameters
+ ----------
+ a : array-like of str or unicode
+ table : str of length 256
+ deletechars : str
+
+ Returns
+ -------
+ out : ndarray
+ Output array of str or unicode, depending on input type
+
+ See also
+ --------
+ str.translate
+ """
+ a_arr = numpy.asarray(a)
+ if issubclass(a_arr.dtype.type, unicode_):
+ return _vec_string(
+ a_arr, a_arr.dtype, 'translate', (table,))
+ else:
+ return _vec_string(
+ a_arr, a_arr.dtype, 'translate', [table] + _clean_args(deletechars))
+
+def upper(a):
+ """
+ Return an array with the elements of `a` converted to uppercase.
+
+ Calls `str.upper` element-wise.
+
+ For 8-bit strings, this method is locale-dependent.
+
+ Parameters
+ ----------
+ a : array-like of str or unicode
+
+ Returns
+ -------
+ out : ndarray
+ Output array of str or unicode, depending on input type
+
+ See also
+ --------
+ str.upper
+
+ Examples
+ --------
+ >>> c = np.array(['a1b c', '1bca', 'bca1']); c
+ array(['a1b c', '1bca', 'bca1'],
+ dtype='|S5')
+ >>> numpy.char.upper(c)
+ array(['A1B C', '1BCA', 'BCA1'],
+ dtype='|S5')
+ """
+ a_arr = numpy.asarray(a)
+ return _vec_string(a_arr, a_arr.dtype, 'upper')
+
+def zfill(a, width):
+ """
+ Return the numeric string left-filled with zeros in a string of
+ length `width`.
+
+ Calls `str.zfill` element-wise.
+
+ Parameters
+ ----------
+ a : array-like of str or unicode
+ width : int
+
+ Returns
+ -------
+ out : ndarray
+ Output array of str or unicode, depending on input type
+
+ See also
+ --------
+ str.zfill
+ """
+ a_arr = numpy.asarray(a)
+ width_arr = numpy.asarray(width)
+ size = long(numpy.max(width_arr.flat))
+ return _vec_string(
+ a_arr, (a_arr.dtype.type, size), 'zfill', (width_arr,))
+
+def isnumeric(a):
+ """
+ For each element in `a`, return True if there are only numeric
+ characters in the element.
+
+ Calls `unicode.isnumeric` element-wise.
+
+ Numeric characters include digit characters, and all characters
+ that have the Unicode numeric value property, e.g. ``U+2155,
+ VULGAR FRACTION ONE FIFTH``.
+
+ Parameters
+ ----------
+ a : array-like of unicode
+
+ Returns
+ -------
+ out : ndarray
+ Array of booleans
+
+ See also
+ --------
+ unicode.isnumeric
+ """
+ if _use_unicode(a) != unicode_:
+ raise TypeError, "isnumeric is only available for Unicode strings and arrays"
+ return _vec_string(a, bool_, 'isnumeric')
+
+def isdecimal(a):
+ """
+ For each element in `a`, return True if there are only decimal
+ characters in the element.
+
+ Calls `unicode.isdecimal` element-wise.
+
+ Decimal characters include digit characters, and all characters
+ that that can be used to form decimal-radix numbers,
+ e.g. ``U+0660, ARABIC-INDIC DIGIT ZERO``.
+
+ Parameters
+ ----------
+ a : array-like of unicode
+
+ Returns
+ -------
+ out : ndarray
+ Array of booleans
+
+ See also
+ --------
+ unicode.isdecimal
+ """
+ if _use_unicode(a) != unicode_:
+ raise TypeError, "isnumeric is only available for Unicode strings and arrays"
+ return _vec_string(a, bool_, 'isdecimal')
-# It also strips white-space on element retrieval and on
-# comparisons
class chararray(ndarray):
"""
chararray(shape, itemsize=1, unicode=False, buffer=None, offset=0,
strides=None, order=None)
- An array of fixed size (perhaps unicode) strings.
+ Provides a convenient view on arrays of string and unicode values.
.. note::
- The chararray module exists for backwards compatibility with Numarray,
- it is not recommended for new development. If one needs arrays of
- strings, use arrays of `dtype` object.
+ This class is provided for numarray backward-compatibility.
+ New code (not concerned with numarray compatibility) should use
+ arrays of type object_, string_ or unicode_ and use the free
+ functions in :mod:`numpy.char <numpy.core.defchararray>` for
+ fast vectorized string operations instead.
+
+ Versus a regular Numpy array of type 'string_' or 'unicode_', this
+ class adds the following functionality:
- Create the array, using `buffer` (with `offset` and `strides`) if it is
- not ``None``. If `buffer` is ``None``, then construct a new array with
- `strides` in "C order," unless both ``len(shape) >= 2`` and
- ``order='Fortran'``, in which case `strides` is in "Fortran order."
+
+ 1) values automatically have whitespace removed from the end
+ when indexed
+
+ 2) comparison operators automatically remove whitespace from the
+ end when comparing values
+
+ 3) vectorized string operations are provided as methods
+ (e.g. `.endswith`) and infix operators (e.g. +, *, %)
+
+ chararrays should be created using `numpy.char.array
+ <numpy.core.defchararray.array>` or `numpy.char.asarray
+ <numpy.core.defchararray.asarray>`, rather than this constructor
+ directly.
+
+ Create the array, using `buffer` (with `offset` and `strides`) if
+ it is not ``None``. If `buffer` is ``None``, then construct a new
+ array with `strides` in "C order," unless both ``len(shape) >= 2``
+ and ``order='Fortran'``, in which case `strides` is in "Fortran
+ order."
Parameters
----------
shape : tuple
- Shape of the array.
+ Shape of the array.
itemsize : int_like, > 0, optional
- Length of each array element, in number of characters. Default is 1.
+ Length of each array element, in number of characters. Default is 1.
unicode : {True, False}, optional
- Are the array elements of unicode-type (``True``) or string-type
- (``False``, the default).
+ Are the array elements of unicode-type (``True``) or string-type
+ (``False``, the default).
buffer : integer, > 0, optional
- Memory address of the start of the array data. If ``None`` (the
- default), a new array is created.
+ Memory address of the start of the array data. If ``None``
+ (the default), a new array is created.
offset : integer, >= 0, optional
- Fixed stride displacement from the beginning of an axis? Default is
- 0.
+ Fixed stride displacement from the beginning of an axis?
+ Default is 0.
strides : array_like(?), optional
- Strides for the array (see `numpy.ndarray.strides` for full
- description), default is ``None``.
+ Strides for the array (see `numpy.ndarray.strides` for full
+ description), default is ``None``.
order : {'C', 'F'}, optional
- The order in which the array data is stored in memory: 'C' -> "row
- major" order (the default), 'F' -> "column major" (Fortran) order
+ The order in which the array data is stored in memory: 'C' ->
+ "row major" order (the default), 'F' -> "column major"
+ (Fortran) order.
Examples
--------
@@ -87,7 +1684,6 @@ class chararray(ndarray):
['abc', 'abc', 'abc'],
['abc', 'abc', 'abc']],
dtype='|S5')
-
"""
def __new__(subtype, shape, itemsize=1, unicode=False, buffer=None,
offset=0, strides=None, order='C'):
@@ -98,6 +1694,11 @@ class chararray(ndarray):
else:
dtype = string_
+ # force itemsize to be a Python long, since using Numpy integer
+ # types results in itemsize.itemsize being used as the size of
+ # strings in the new array.
+ itemsize = long(itemsize)
+
_globalvar = 1
if buffer is None:
self = ndarray.__new__(subtype, shape, (dtype, itemsize),
@@ -117,87 +1718,134 @@ class chararray(ndarray):
def __getitem__(self, obj):
val = ndarray.__getitem__(self, obj)
- if isinstance(val, (string_, unicode_)):
+ if issubclass(val.dtype.type, character):
temp = val.rstrip()
- if len(temp) == 0:
+ if _len(temp) == 0:
val = ''
else:
val = temp
return val
+ # IMPLEMENTATION NOTE: Most of the methods of this class are
+ # direct delegations to the free functions in this module.
+ # However, those that return an array of strings should instead
+ # return a chararray, so some extra wrapping is required.
+
def __eq__(self, other):
- return compare_chararrays(self, other, '==', True)
+ """
+ Return (self == other) element-wise.
+
+ See also
+ --------
+ equal
+ """
+ return equal(self, other)
def __ne__(self, other):
- return compare_chararrays(self, other, '!=', True)
+ """
+ Return (self != other) element-wise.
+
+ See also
+ --------
+ not_equal
+ """
+ return not_equal(self, other)
def __ge__(self, other):
- return compare_chararrays(self, other, '>=', True)
+ """
+ Return (self >= other) element-wise.
+
+ See also
+ --------
+ greater_equal
+ """
+ return greater_equal(self, other)
def __le__(self, other):
- return compare_chararrays(self, other, '<=', True)
+ """
+ Return (self <= other) element-wise.
+
+ See also
+ --------
+ less_equal
+ """
+ return less_equal(self, other)
def __gt__(self, other):
- return compare_chararrays(self, other, '>', True)
+ """
+ Return (self > other) element-wise.
+
+ See also
+ --------
+ greater
+ """
+ return greater(self, other)
def __lt__(self, other):
- return compare_chararrays(self, other, '<', True)
+ """
+ Return (self < other) element-wise.
+
+ See also
+ --------
+ less
+ """
+ return less(self, other)
def __add__(self, other):
- b = broadcast(self, other)
- arr = b.iters[1].base
- outitem = self.itemsize + arr.itemsize
- result = chararray(b.shape, outitem, self.dtype is unicode_)
- res = result.flat
- for k, val in enumerate(b):
- res[k] = (val[0] + val[1])
- return result
+ """
+ Return (self + other), that is string concatenation,
+ element-wise for a pair of array_likes of string_ or unicode_.
+
+ See also
+ --------
+ add
+ """
+ return asarray(add(self, other))
def __radd__(self, other):
- b = broadcast(other, self)
- outitem = b.iters[0].base.itemsize + \
- b.iters[1].base.itemsize
- result = chararray(b.shape, outitem, self.dtype is unicode_)
- res = result.flat
- for k, val in enumerate(b):
- res[k] = (val[0] + val[1])
- return result
-
- def __mul__(self, other):
- b = broadcast(self, other)
- arr = b.iters[1].base
- if not issubclass(arr.dtype.type, integer):
- raise ValueError, "Can only multiply by integers"
- outitem = b.iters[0].base.itemsize * arr.max()
- result = chararray(b.shape, outitem, self.dtype is unicode_)
- res = result.flat
- for k, val in enumerate(b):
- res[k] = val[0]*val[1]
- return result
-
- def __rmul__(self, other):
- b = broadcast(self, other)
- arr = b.iters[1].base
- if not issubclass(arr.dtype.type, integer):
- raise ValueError, "Can only multiply by integers"
- outitem = b.iters[0].base.itemsize * arr.max()
- result = chararray(b.shape, outitem, self.dtype is unicode_)
- res = result.flat
- for k, val in enumerate(b):
- res[k] = val[0]*val[1]
- return result
-
- def __mod__(self, other):
- b = broadcast(self, other)
- res = [None]*b.size
- maxsize = -1
- for k,val in enumerate(b):
- newval = val[0] % val[1]
- maxsize = max(len(newval), maxsize)
- res[k] = newval
- newarr = chararray(b.shape, maxsize, self.dtype is unicode_)
- newarr[:] = res
- return newarr
+ """
+ Return (other + self), that is string concatenation,
+ element-wise for a pair of array_likes of string_ or unicode_.
+
+ See also
+ --------
+ add
+ """
+ return asarray(add(numpy.asarray(other), self))
+
+ def __mul__(self, i):
+ """
+ Return (self * i), that is string multiple concatenation,
+ element-wise.
+
+ See also
+ --------
+ multiply
+ """
+ return asarray(multiply(self, i))
+
+ def __rmul__(self, i):
+ """
+ Return (self * i), that is string multiple concatenation,
+ element-wise.
+
+ See also
+ --------
+ multiply
+ """
+ return asarray(multiply(self, i))
+
+ def __mod__(self, i):
+ """
+ Return (self % i), that is pre-Python 2.6 string formatting
+ (iterpolation), element-wise for a pair of array_likes of string_
+ or unicode_.
+
+ See also
+ --------
+ mod
+ """
+ return asarray(mod(self, i))
def __rmod__(self, other):
return NotImplemented
@@ -221,587 +1869,659 @@ class chararray(ndarray):
"""
return self.__array__().argsort(axis, kind, order)
-
- def _generalmethod(self, name, myiter):
- res = [None]*myiter.size
- maxsize = -1
- for k, val in enumerate(myiter):
- newval = []
- for chk in val[1:]:
- if not chk or (chk.dtype is object_ and chk.item() is None):
- break
- newval.append(chk)
- newitem = getattr(val[0],name)(*newval)
- maxsize = max(len(newitem), maxsize)
- res[k] = newitem
- newarr = chararray(myiter.shape, maxsize, self.dtype is unicode_)
- newarr[:] = res
- return newarr
-
- def _typedmethod(self, name, myiter, dtype):
- result = empty(myiter.shape, dtype=dtype)
- res = result.flat
- for k, val in enumerate(myiter):
- newval = []
- for chk in val[1:]:
- if not chk or (chk.dtype is object_ and chk.item() is None):
- break
- newval.append(chk)
- this_str = val[0].rstrip('\x00')
- newitem = getattr(this_str,name)(*newval)
- res[k] = newitem
- return result
-
- def _samemethod(self, name):
- result = self.copy()
- res = result.flat
- for k, val in enumerate(self.flat):
- res[k] = getattr(val, name)()
- return result
+ argsort.__doc__ = ndarray.argsort.__doc__
def capitalize(self):
"""
- Capitalize the first character of each array element.
-
- For each element of `self`, if the first character is a letter
- possessing both "upper-case" and "lower-case" forms, and it is
- presently in lower-case, change it to upper-case; otherwise, leave
- it untouched.
-
- Parameters
- ----------
- None
-
- Returns
- -------
- ret : chararray
- `self` with each element "title-cased."
+ Return a copy of `self` with only the first character of each element
+ capitalized.
- Examples
+ See also
--------
- >>> c = np.array(['a1b2','1b2a','b2a1','2a1b'],'S4').view(np.chararray); c
- chararray(['a1b2', '1b2a', 'b2a1', '2a1b'],
- dtype='|S4')
- >>> c.capitalize()
- chararray(['A1b2', '1b2a', 'B2a1', '2a1b'],
- dtype='|S4')
-
+ capitalize
"""
- return self._samemethod('capitalize')
+ return asarray(capitalize(self))
- if sys.version[:3] >= '2.4':
+ if sys.version_info >= (2, 4):
def center(self, width, fillchar=' '):
- return self._generalmethod('center',
- broadcast(self, width, fillchar))
- def ljust(self, width, fillchar=' '):
- return self._generalmethod('ljust',
- broadcast(self, width, fillchar))
- def rjust(self, width, fillchar=' '):
- return self._generalmethod('rjust',
- broadcast(self, width, fillchar))
- def rsplit(self, sep=None, maxsplit=None):
- return self._typedmethod('rsplit', broadcast(self, sep, maxsplit),
- object)
+ """
+ Return a copy of `self` with its elements centered in a
+ string of length `width`.
+
+ See also
+ --------
+ center
+ """
+ return asarray(center(self, width, fillchar))
else:
- def ljust(self, width):
- return self._generalmethod('ljust', broadcast(self, width))
- def rjust(self, width):
- return self._generalmethod('rjust', broadcast(self, width))
def center(self, width):
- return self._generalmethod('center', broadcast(self, width))
+ """
+ Return a copy of `self` with its elements centered in a
+ string of length `width`.
- def count(self, sub, start=None, end=None):
- """
- Return the number of occurrences of a sub-string in each array element.
+ See also
+ --------
+ center
+ """
+ return asarray(center(self, width))
- Parameters
- ----------
- sub : string
- The sub-string to count.
- start : int, optional
- The string index at which to start counting in each element.
- end : int, optional
- The string index at which to end counting in each element.
-
- Returns
- -------
- ret : ndarray of ints
- Array whose elements are the number of occurrences of `sub` in each
- element of `self`.
+ def count(self, sub, start=0, end=None):
+ """
+ Returns an array with the number of non-overlapping occurrences of
+ substring `sub` in the range [`start`, `end`].
- Examples
+ See also
--------
- >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']).view(np.chararray)
- >>> c
- chararray(['aAaAaA', ' aA', 'abBABba'],
- dtype='|S7')
- >>> c.count('A')
- array([3, 1, 1])
- >>> c.count('aA')
- array([3, 1, 0])
- >>> c.count('A', start=1, end=4)
- array([2, 1, 1])
- >>> c.count('A', start=1, end=3)
- array([1, 0, 0])
-
+ count
"""
- return self._typedmethod('count', broadcast(self, sub, start, end), int)
+ return count(self, sub, start, end)
+
- def decode(self,encoding=None,errors=None):
+ def decode(self, encoding=None, errors=None):
"""
- Return elements decoded according to the value of `encoding`.
+ Calls `str.decode` element-wise.
- Parameters
- ----------
- encoding : string, optional
- The encoding to use; for a list of acceptable values, see the
- Python docstring for the package 'encodings'
- error : Python exception object?, optional
- The exception to raise if decoding fails?
+ See also
+ --------
+ decode
+ """
+ return decode(self, encoding, errors)
- Returns
- -------
- ret : chararray
- A view of `self`, suitably decoded.
+ def encode(self, encoding=None, errors=None):
+ """
+ Calls `str.encode` element-wise.
- See Also
+ See also
--------
encode
- encodings
- (package)
-
- Examples
- --------
- >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']).view(np.chararray)
- >>> c
- chararray(['aAaAaA', ' aA', 'abBABba'],
- dtype='|S7')
- >>> c = c.encode(encoding='cp037'); c
- chararray(['\\x81\\xc1\\x81\\xc1\\x81\\xc1', '@@\\x81\\xc1@@',
- '\\x81\\x82\\xc2\\xc1\\xc2\\x82\\x81'],
- dtype='|S7')
- >>> c.decode(encoding='cp037')
- chararray(['aAaAaA', ' aA', 'abBABba'],
- dtype='|S7')
-
"""
- return self._generalmethod('decode', broadcast(self, encoding, errors))
+ return encode(self, encoding, errors)
- def encode(self,encoding=None,errors=None):
+ def endswith(self, suffix, start=0, end=None):
"""
- Return elements encoded according to the value of `encoding`.
-
- Parameters
- ----------
- encoding : string, optional
- The encoding to use; for a list of acceptable values, see the
- Python docstring for `encodings`.
- error : Python exception object, optional
- The exception to raise if encoding fails.
+ Returns a boolean array which is `True` where the string element
+ in `self` ends with `suffix`, otherwise `False`.
- Returns
- -------
- ret : chararray
- A view of `self`, suitably encoded.
-
- See Also
- --------
- decode
-
- Examples
+ See also
--------
- >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']).view(np.chararray)
- >>> c
- chararray(['aAaAaA', ' aA', 'abBABba'],
- dtype='|S7')
- >>> c.encode(encoding='cp037')
- chararray(['\\x81\\xc1\\x81\\xc1\\x81\\xc1', '@@\\x81\\xc1@@',
- '\\x81\\x82\\xc2\\xc1\\xc2\\x82\\x81'],
- dtype='|S7')
-
+ endswith
"""
- return self._generalmethod('encode', broadcast(self, encoding, errors))
+ return endswith(self, suffix, start, end)
- def endswith(self, suffix, start=None, end=None):
+ def expandtabs(self, tabsize=8):
"""
- Check whether elements end with specified suffix
-
- Given an array of strings, return a new bool array of same shape with
- the result of comparing suffix against each element; each element
- of bool array is ``True`` if element ends with specified suffix and
- ``False`` otherwise.
+ Return a copy of each string element where all tab characters are
+ replaced by one or more spaces.
- Parameters
- ----------
- suffix : string
- Compare each element in array to this.
- start : int, optional
- For each element, start searching from this position.
- end : int, optional
- For each element, stop comparing at this position.
+ See also
+ --------
+ expandtabs
+ """
+ return asarray(expandtabs(self, tabsize))
- Returns
- -------
- endswith : ndarray
- Output array of bools
+ def find(self, sub, start=0, end=None):
+ """
+ For each element, return the lowest index in the string where
+ substring `sub` is found.
- See Also
+ See also
--------
- count
find
- index
- startswith
-
- Examples
- --------
- >>> s = chararray(3, itemsize=3)
- >>> s[0] = 'foo'
- >>> s[1] = 'bar'
- >>> s
- chararray(['foo', 'bar'],
- dtype='|S3')
- >>> s.endswith('ar')
- array([False, True], dtype=bool)
- >>> s.endswith('a', start=1, end=2)
- array([False, True], dtype=bool)
-
"""
- return self._typedmethod('endswith', broadcast(self, suffix, start, end), bool)
-
- def expandtabs(self, tabsize=None):
- return self._generalmethod('endswith', broadcast(self, tabsize))
-
- def find(self, sub, start=None, end=None):
- return self._typedmethod('find', broadcast(self, sub, start, end), int)
+ return find(self, sub, start, end)
- def index(self, sub, start=None, end=None):
- return self._typedmethod('index', broadcast(self, sub, start, end), int)
+ def index(self, sub, start=0, end=None):
+ """
+ Like `find`, but raises `ValueError` when the substring is not found.
- def _ismethod(self, name):
- result = empty(self.shape, dtype=bool)
- res = result.flat
- for k, val in enumerate(self.flat):
- item = val.rstrip('\x00')
- res[k] = getattr(item, name)()
- return result
+ See also
+ --------
+ index
+ """
+ return index(self, sub, start, end)
def isalnum(self):
- return self._ismethod('isalnum')
+ """
+ Returns true for each element if all characters in the string
+ are alphanumeric and there is at least one character, false
+ otherwise.
+
+ See also
+ --------
+ isalnum
+ """
+ return isalnum(self)
def isalpha(self):
- return self._ismethod('isalpha')
+ """
+ Returns true for each element if all characters in the string
+ are alphabetic and there is at least one character, false
+ otherwise.
+
+ See also
+ --------
+ isalpha
+ """
+ return isalpha(self)
def isdigit(self):
- return self._ismethod('isdigit')
+ """
+ Returns true for each element if all characters in the string are
+ digits and there is at least one character, false otherwise.
+
+ See also
+ --------
+ isdigit
+ """
+ return isdigit(self)
def islower(self):
- return self._ismethod('islower')
+ """
+ Returns true for each element if all cased characters in the
+ string are lowercase and there is at least one cased character,
+ false otherwise.
+
+ See also
+ --------
+ islower
+ """
+ return islower(self)
def isspace(self):
- return self._ismethod('isspace')
+ """
+ Returns true for each element if there are only whitespace
+ characters in the string and there is at least one character,
+ false otherwise.
+
+ See also
+ --------
+ isspace
+ """
+ return isspace(self)
def istitle(self):
- return self._ismethod('istitle')
+ """
+ Returns true for each element if the element is a titlecased
+ string and there is at least one character, false otherwise.
+
+ See also
+ --------
+ istitle
+ """
+ return istitle(self)
def isupper(self):
- return self._ismethod('isupper')
+ """
+ Returns true for each element if all cased characters in the
+ string are uppercase and there is at least one character, false
+ otherwise.
- def join(self, seq):
- return self._generalmethod('join', broadcast(self, seq))
+ See also
+ --------
+ isupper
+ """
+ return isupper(self)
- def lower(self):
+ def join(self, seq):
"""
- Assure that every character of each array element is lower-case.
+ Return a string which is the concatenation of the strings in the
+ sequence `seq`.
- For each character possessing both "upper-case" and "lower-case" forms,
- if it is in upper-case, change it to lower; otherwise, leave it unchanged.
+ See also
+ --------
+ join
+ """
+ return join(self, seq)
- Parameters
- ----------
- None
+ if sys.version_info >= (2, 4):
+ def ljust(self, width, fillchar=' '):
+ """
+ Return an array with the elements of `self` left-justified in a
+ string of length `width`.
+
+ See also
+ --------
+ ljust
+ """
+ return asarray(ljust(self, width, fillchar))
+ else:
+ def ljust(self, width):
+ """
+ Return an array with the elements of `self` left-justified in a
+ string of length `width`.
- Returns
- -------
- ret : chararray
- `self` with all capital letters changed to lower-case.
+ See also
+ --------
+ ljust
+ """
+ return asarray(ljust(self, width))
- Examples
+ def lower(self):
+ """
+ Return an array with the elements of `self` converted to
+ lowercase.
+ See also
--------
- >>> c = np.array(['A1B C', '1BCA', 'BCA1']).view(np.chararray); c
- chararray(['A1B C', '1BCA', 'BCA1'],
- dtype='|S5')
- >>> c.lower()
- chararray(['a1b c', '1bca', 'bca1'],
- dtype='|S5')
-
+ lower
"""
- return self._samemethod('lower')
+ return asarray(lower(self))
- def lstrip(self, chars):
+ def lstrip(self, chars=None):
"""
- Remove leading characters from each element.
+ For each element in `self`, return a copy with the leading characters
+ removed.
- Returns a view of ``self`` with `chars` stripped from the start of
- each element. Note: **No Default** - `chars` must be specified (but if
- it is explicitly ``None`` or the empty string '', leading whitespace is
- removed).
+ See also
+ --------
+ lstrip
+ """
+ return asarray(lstrip(self, chars))
- Parameters
- ----------
- chars : string_like or None
- Character(s) to strip; whitespace stripped if `chars` == ``None``
- or `chars` == ''.
+ if sys.version_info >= (2, 5):
+ def partition(self, sep):
+ """
+ Partition each element in `self` around `sep`.
- Returns
- -------
- ret : chararray
- View of ``self``, each element suitably stripped.
+ See also
+ --------
+ partition
+ """
+ return asarray(partition(self, sep))
- Raises
- ------
- TypeError: lstrip() takes exactly 2 arguments (1 given)
- If `chars` is not supplied.
+ def replace(self, old, new, count=None):
+ """
+ For each element in `self`, return a copy of the string with all
+ occurrences of substring `old` replaced by `new`.
- Examples
+ See also
--------
- >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']).view(np.chararray)
- >>> c
- chararray(['aAaAaA', ' aA', 'abBABba'],
- dtype='|S7')
- >>> c.lstrip('a') # 'a' unstripped from c[1] because whitespace leading
- chararray(['AaAaA', ' aA', 'bBABba'],
- dtype='|S6')
- >>> c.lstrip('A') # leaves c unchanged
- chararray(['aAaAaA', ' aA', 'abBABba'],
- dtype='|S7')
- >>> (c.lstrip(' ') == c.lstrip('')).all()
- True
- >>> (c.lstrip(' ') == c.lstrip(None)).all()
- True
-
+ replace
"""
- return self._generalmethod('lstrip', broadcast(self, chars))
+ return asarray(replace(self, old, new, count))
- def replace(self, old, new, count=None):
- return self._generalmethod('replace', broadcast(self, old, new, count))
+ def rfind(self, sub, start=0, end=None):
+ """
+ For each element in `self`, return the highest index in the string
+ where substring `sub` is found, such that `sub` is contained
+ within [`start`, `end`].
- def rfind(self, sub, start=None, end=None):
- return self._typedmethod('rfind', broadcast(self, sub, start, end), int)
+ See also
+ --------
+ rfind
+ """
+ return rfind(self, sub, start, end)
- def rindex(self, sub, start=None, end=None):
- return self._typedmethod('rindex', broadcast(self, sub, start, end), int)
+ def rindex(self, sub, start=0, end=None):
+ """
+ Like `rfind`, but raises `ValueError` when the substring `sub` is
+ not found.
- def rstrip(self, chars=None):
+ See also
+ --------
+ rindex
"""
- Remove trailing characters.
+ return rindex(self, sub, start, end)
- Returns a view of ``self`` with `chars` stripped from the end of each
- element.
+ if sys.version_info >= (2, 4):
+ def rjust(self, width, fillchar=' '):
+ """
+ Return an array with the elements of `self`
+ right-justified in a string of length `width`.
+
+ See also
+ --------
+ rjust
+ """
+ return asarray(rjust(self, width, fillchar))
+ else:
+ def rjust(self, width):
+ """
+ Return an array with the elements of `self`
+ right-justified in a string of length `width`.
+
+ See also
+ --------
+ rjust
+ """
+ return asarray(rjust(self, width))
+
+ if sys.version_info >= (2, 5):
+ def rpartition(self, sep):
+ """
+ Partition each element in `self` around `sep`.
+
+ See also
+ --------
+ rpartition
+ """
+ return asarray(rpartition(self, sep))
+
+ if sys.version_info >= (2, 4):
+ def rsplit(self, sep=None, maxsplit=None):
+ """
+ For each element in `self`, return a list of the words in
+ the string, using `sep` as the delimiter string.
- Parameters
- ----------
- chars : string_like, optional, default=None
- Character(s) to remove.
+ See also
+ --------
+ rsplit
+ """
+ return rsplit(self, sep, maxsplit)
- Returns
- -------
- ret : chararray
- View of ``self``, each element suitably stripped.
+ def rstrip(self, chars=None):
+ """
+ For each element in `self`, return a copy with the trailing
+ characters removed.
- Examples
+ See also
--------
- >>> c = np.array(['aAaAaA', 'abBABba'], dtype='S7').view(np.chararray); c
- chararray(['aAaAaA', 'abBABba'],
- dtype='|S7')
- >>> c.rstrip('a')
- chararray(['aAaAaA', 'abBABb'],
- dtype='|S6')
- >>> c.rstrip('A')
- chararray(['aAaAa', 'abBABba'],
- dtype='|S7')
-
+ rstrip
"""
- return self._generalmethod('rstrip', broadcast(self, chars))
+ return asarray(rstrip(self, chars))
def split(self, sep=None, maxsplit=None):
- return self._typedmethod('split', broadcast(self, sep, maxsplit), object)
+ """
+ For each element in `self`, return a list of the words in the
+ string, using `sep` as the delimiter string.
- def splitlines(self, keepends=None):
- return self._typedmethod('splitlines', broadcast(self, keepends), object)
+ See also
+ --------
+ split
+ """
+ return split(self, sep, maxsplit)
- def startswith(self, prefix, start=None, end=None):
- return self._typedmethod('startswith', broadcast(self, prefix, start, end), bool)
+ def splitlines(self, keepends=None):
+ """
+ For each element in `self`, return a list of the lines in the
+ element, breaking at line boundaries.
- def strip(self, chars=None):
+ See also
+ --------
+ splitlines
"""
- Remove leading and trailing characters, whitespace by default.
+ return splitlines(self, keepends)
- Returns a view of ``self`` with `chars` stripped from the start and end of
- each element; by default leading and trailing whitespace is removed.
+ def startswith(self, prefix, start=0, end=None):
+ """
+ Returns a boolean array which is `True` where the string element
+ in `self` starts with `prefix`, otherwise `False`.
- Parameters
- ----------
- chars : string_like, optional, default=None
- Character(s) to strip; whitespace by default.
+ See also
+ --------
+ startswith
+ """
+ return startswith(self, prefix, start, end)
- Returns
- -------
- ret : chararray
- View of ``self``, each element suitably stripped.
+ def strip(self, chars=None):
+ """
+ For each element in `self`, return a copy with the leading and
+ trailing characters removed.
- Examples
+ See also
--------
- >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']).view(np.chararray)
- >>> c
- chararray(['aAaAaA', ' aA', 'abBABba'],
- dtype='|S7')
- >>> c.strip()
- chararray(['aAaAaA', 'aA', 'abBABba'],
- dtype='|S7')
- >>> c.strip('a') # 'a' unstripped from c[1] because whitespace leads
- chararray(['AaAaA', ' aA', 'bBABb'],
- dtype='|S6')
- >>> c.strip('A') # 'A' unstripped from c[1] because (unprinted) ws trails
- chararray(['aAaAa', ' aA', 'abBABba'],
- dtype='|S7')
-
+ strip
"""
- return self._generalmethod('strip', broadcast(self, chars))
+ return asarray(strip(self, chars))
def swapcase(self):
"""
- Switch upper-case letters to lower-case, and vice-versa.
+ For each element in `self`, return a copy of the string with
+ uppercase characters converted to lowercase and vice versa.
- Parameters
- ----------
- None
+ See also
+ --------
+ swapcase
+ """
+ return asarray(swapcase(self))
- Returns
- -------
- ret : chararray
- `self` with all lower-case letters capitalized and all upper-case
- changed to lower case.
+ def title(self):
+ """
+ For each element in `self`, return a titlecased version of the
+ string: words start with uppercase characters, all remaining cased
+ characters are lowercase.
- Examples
+ See also
--------
- >>> c=np.array(['a1B c','1b Ca','b Ca1','cA1b'],'S5').view(np.chararray);c
- chararray(['a1B c', '1b Ca', 'b Ca1', 'cA1b'],
- dtype='|S5')
- >>> c.swapcase()
- chararray(['A1b C', '1B cA', 'B cA1', 'Ca1B'],
- dtype='|S5')
+ title
+ """
+ return asarray(title(self))
+ def translate(self, table, deletechars=None):
"""
- return self._samemethod('swapcase')
+ For each element in `self`, return a copy of the string where
+ all characters occurring in the optional argument
+ `deletechars` are removed, and the remaining characters have
+ been mapped through the given translation table.
- def title(self):
+ See also
+ --------
+ translate
"""
- Capitalize the first character of each array element.
+ return asarray(translate(self, table, deletechars))
- For each element of `self`, if the first character is a letter
- possessing both "upper-case" and "lower-case" forms, and it is
- presently in lower-case, change it to upper-case; otherwise, leave
- it untouched.
+ def upper(self):
+ """
+ Return an array with the elements of `self` converted to
+ uppercase.
- Parameters
- ----------
- None
+ See also
+ --------
+ upper
+ """
+ return asarray(upper(self))
- Returns
- -------
- ret : chararray
- `self` with
+ def zfill(self, width):
+ """
+ Return the numeric string left-filled with zeros in a string of
+ length `width`.
- Examples
+ See also
--------
- >>> c=np.array(['a1b c','1b ca','b ca1','ca1b'],'S5').view(np.chararray);c
- chararray(['a1b c', '1b ca', 'b ca1', 'ca1b'],
- dtype='|S5')
- >>> c.title()
- chararray(['A1B C', '1B Ca', 'B Ca1', 'Ca1B'],
- dtype='|S5')
+ zfill
+ """
+ return asarray(zfill(self, width))
+ def isnumeric(self):
"""
- return self._samemethod('title')
+ For each element in `self`, return True if there are only
+ numeric characters in the element.
- def translate(self, table, deletechars=None):
- if self.dtype is unicode_:
- return self._generalmethod('translate', broadcast(self, table))
- else:
- return self._generalmethod('translate', broadcast(self, table, deletechars))
+ See also
+ --------
+ isnumeric
+ """
+ return isnumeric(self)
- def upper(self):
+ def isdecimal(self):
"""
- Capitalize every character of each array element.
+ For each element in `self`, return True if there are only
+ decimal characters in the element.
- For each character possessing both "upper-case" and "lower-case" forms,
- if it is in lower-case, change it to upper; otherwise, leave it unchanged.
+ See also
+ --------
+ isdecimal
+ """
+ return isdecimal(self)
- Parameters
- ----------
- None
- Returns
- -------
- ret : chararray
- `self` with all characters capitalized.
+def array(obj, itemsize=None, copy=True, unicode=None, order=None):
+ """
+ Create a `chararray`.
- Examples
- --------
- >>> c = np.array(['a1b c', '1bca', 'bca1']).view(np.chararray); c
- chararray(['a1b c', '1bca', 'bca1'],
- dtype='|S5')
- >>> c.upper()
- chararray(['A1B C', '1BCA', 'BCA1'],
- dtype='|S5')
+ .. note::
+ This class is provided for numarray backward-compatibility.
+ New code (not concerned with numarray compatibility) should use
+ arrays of type object_, string_ or unicode_ and use the free
+ functions in :mod:`numpy.char <numpy.core.defchararray>` for
+ fast vectorized string operations instead.
- """
- return self._samemethod('upper')
+ Versus a regular Numpy array of type `string_` or `unicode_`, this
+ class adds the following functionality:
- def zfill(self, width):
- return self._generalmethod('zfill', broadcast(self, width))
+ 1) values automatically have whitespace removed from the end
+ when indexed
+ 2) comparison operators automatically remove whitespace from the
+ end when comparing values
-def array(obj, itemsize=None, copy=True, unicode=False, order=None):
+ 3) vectorized string operations are provided as methods
+ (e.g. `str.endswith`) and infix operators (e.g. +, *, %)
+
+ Parameters
+ ----------
+ obj : array of str or unicode-like
+
+ itemsize : int, optional
+ `itemsize` is the number of characters per scalar in the
+ resulting array. If `itemsize` is None, and `obj` is an
+ object array or a Python list, the `itemsize` will be
+ automatically determined. If `itemsize` is provided and `obj`
+ is of type `str` or `unicode`, then the `obj` string will be
+ chunked into `itemsize` pieces.
+
+ copy : bool, optional
+ If true (default), then the object is copied. Otherwise, a copy
+ will only be made if __array__ returns a copy, if obj is a
+ nested sequence, or if a copy is needed to satisfy any of the other
+ requirements (`itemsize`, `unicode`, `order`, etc.).
+
+ unicode : bool, optional
+ When true, the resulting `chararray` can contain Unicode
+ characters, when false only 8-bit characters. If `unicode` is
+ `None` and `obj` is one of the following:
+
+ - a `chararray`,
+ - an ndarray of type `string_` or `unicode_`
+ - a Python str or unicode object,
+
+ then the unicode setting of the output array will be
+ automatically determined.
+
+ order : {'C', 'F', 'A'}, optional
+ Specify the order of the array. If order is 'C' (default), then the
+ array will be in C-contiguous order (last-index varies the
+ fastest). If order is 'F', then the returned array
+ will be in Fortran-contiguous order (first-index varies the
+ fastest). If order is 'A', then the returned array may
+ be in any order (either C-, Fortran-contiguous, or even
+ discontiguous).
+ """
+ if isinstance(obj, (str, _unicode)):
+ if unicode is None:
+ if isinstance(obj, _unicode):
+ unicode = True
+ else:
+ unicode = False
+ if itemsize is None:
+ itemsize = _len(obj)
+ shape = _len(obj) / itemsize
+ return chararray(shape, itemsize=itemsize, unicode=unicode,
+ buffer=obj, order=order)
+
+ if isinstance(obj, (list, tuple)):
+ obj = numpy.asarray(obj)
+
+ if isinstance(obj, ndarray) and issubclass(obj.dtype.type, character):
+ # If we just have a vanilla chararray, create a chararray
+ # view around it.
+ if not isinstance(obj, chararray):
+ obj = obj.view(chararray)
- if isinstance(obj, chararray):
if itemsize is None:
itemsize = obj.itemsize
- if copy or (itemsize != obj.itemsize) \
- or (not unicode and obj.dtype == unicode_) \
- or (unicode and obj.dtype == string_):
- return obj.astype("%s%d" % (obj.dtype.char, itemsize))
+ # itemsize is in 8-bit chars, so for Unicode, we need
+ # to divide by the size of a single Unicode character,
+ # which for Numpy is always 4
+ if issubclass(obj.dtype.type, unicode_):
+ itemsize /= 4
+
+ if unicode is None:
+ if issubclass(obj.dtype.type, unicode_):
+ unicode = True
+ else:
+ unicode = False
+
+ if unicode:
+ dtype = unicode_
else:
- return obj
+ dtype = string_
- if isinstance(obj, ndarray) and (obj.dtype in [unicode_, string_]):
- new = obj.view(chararray)
- if unicode and obj.dtype == string_:
- return new.astype((unicode_, obj.itemsize))
- elif obj.dtype == unicode_:
- return new.astype((string_, obj.itemsize))
+ if order is not None:
+ obj = numpy.asarray(obj, order=order)
+ if (copy
+ or (itemsize != obj.itemsize)
+ or (not unicode and isinstance(obj, unicode_))
+ or (unicode and isinstance(obj, string_))):
+ obj = obj.astype((dtype, long(itemsize)))
+ return obj
- if copy: return new.copy()
- else: return new
+ if isinstance(obj, ndarray) and issubclass(obj.dtype.type, object):
+ if itemsize is None:
+ # Since no itemsize was specified, convert the input array to
+ # a list so the ndarray constructor will automatically
+ # determine the itemsize for us.
+ obj = obj.tolist()
+ # Fall through to the default case
+
+ if unicode:
+ dtype = unicode_
+ else:
+ dtype = string_
- if unicode: dtype = "U"
- else: dtype = "S"
+ if itemsize is None:
+ val = narray(obj, dtype=dtype, order=order, subok=True)
+ else:
+ val = narray(obj, dtype=(dtype, itemsize), order=order, subok=True)
+ return val.view(chararray)
- if itemsize is not None:
- dtype += str(itemsize)
- if isinstance(obj, (str, _unicode)):
- if itemsize is None:
- itemsize = len(obj)
- shape = len(obj) / itemsize
- return chararray(shape, itemsize=itemsize, unicode=unicode,
- buffer=obj)
+def asarray(obj, itemsize=None, unicode=None, order=None):
+ """
+ Convert the input to a `chararray`, copying the data only if
+ necessary.
- # default
- val = narray(obj, dtype=dtype, order=order, subok=1)
+ Versus a regular Numpy array of type `string_` or `unicode_`, this
+ class adds the following functionality:
- return val.view(chararray)
+ 1) values automatically have whitespace removed from the end
+ when indexed
+
+ 2) comparison operators automatically remove whitespace from the
+ end when comparing values
+
+ 3) vectorized string operations are provided as methods
+ (e.g. `str.endswith`) and infix operators (e.g. +, *, %)
+
+ Parameters
+ ----------
+ obj : array of str or unicode-like
-def asarray(obj, itemsize=None, unicode=False, order=None):
+ itemsize : int, optional
+ `itemsize` is the number of characters per scalar in the
+ resulting array. If `itemsize` is None, and `obj` is an
+ object array or a Python list, the `itemsize` will be
+ automatically determined. If `itemsize` is provided and `obj`
+ is of type `str` or `unicode`, then the `obj` string will be
+ chunked into `itemsize` pieces.
+
+ unicode : bool, optional
+ When true, the resulting `chararray` can contain Unicode
+ characters, when false only 8-bit characters. If `unicode` is
+ `None` and `obj` is one of the following:
+
+ - a `chararray`,
+ - an ndarray of type `string_` or 'unicode_`
+ - a Python str or unicode object,
+
+ then the unicode setting of the output array will be
+ automatically determined.
+
+ order : {'C', 'F'}, optional
+ Specify the order of the array. If order is 'C' (default), then the
+ array will be in C-contiguous order (last-index varies the
+ fastest). If order is 'F', then the returned array
+ will be in Fortran-contiguous order (first-index varies the
+ fastest).
+ """
return array(obj, itemsize, copy=False,
unicode=unicode, order=order)