summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authormdroe <mdroe@localhost>2009-10-12 15:24:43 +0000
committermdroe <mdroe@localhost>2009-10-12 15:24:43 +0000
commit7deb4d5db4282e1e1c860dda268a21c6979e9725 (patch)
tree6dbd07a1dfcec326ce90228c17cbdd337f4f1621
parent66c62a8aeed8c963e2ef108685d5681e1e445f3d (diff)
downloadnumpy-7deb4d5db4282e1e1c860dda268a21c6979e9725.tar.gz
Many improvements to chararray. Fixes Trac bugs 1199, 856, 855, 1231, 1235, 1240, 1241. docstrings, full unit-test coverage, C-based vectorized string operations.
-rw-r--r--doc/source/reference/arrays.classes.rst11
-rw-r--r--doc/source/reference/routines.array-creation.rst1
-rw-r--r--doc/source/reference/routines.char.rst88
-rw-r--r--doc/source/reference/routines.rst1
-rw-r--r--numpy/core/defchararray.py2802
-rw-r--r--numpy/core/src/multiarray/arraytypes.c.src20
-rw-r--r--numpy/core/src/multiarray/common.c6
-rw-r--r--numpy/core/src/multiarray/multiarraymodule.c228
-rw-r--r--numpy/core/tests/test_defchararray.py534
-rw-r--r--numpy/core/tests/test_regression.py32
10 files changed, 3141 insertions, 582 deletions
diff --git a/doc/source/reference/arrays.classes.rst b/doc/source/reference/arrays.classes.rst
index 9e94d5c4d..6d5e7bde0 100644
--- a/doc/source/reference/arrays.classes.rst
+++ b/doc/source/reference/arrays.classes.rst
@@ -228,11 +228,12 @@ Character arrays (:mod:`numpy.char`)
single: character arrays
.. note::
- The chararray module exists for backwards compatibility with Numarray,
- it is not recommended for new development. If one needs arrays of
- strings, use arrays of `dtype` object.
+ The chararray module exists for backwards compatibility with
+ Numarray, it is not recommended for new development. If one needs
+ arrays of strings, use arrays of `dtype` `object_`, `string_` or
+ `unicode_`.
-These are enhanced arrays of either :class:`string` type or
+These are enhanced arrays of either :class:`string_` type or
:class:`unicode_` type. These arrays inherit from the
:class:`ndarray`, but specially-define the operations ``+``, ``*``,
and ``%`` on a (broadcasting) element-by-element basis. These
@@ -244,7 +245,7 @@ to create a chararray is to use :meth:`self.view(chararray)
<ndarray.view>` where *self* is an ndarray of string or unicode
data-type. However, a chararray can also be created using the
:meth:`numpy.chararray` constructor, or via the
-:func:`numpy.char.array` function:
+:func:`numpy.char.array <core.defchararray.array>` function:
.. autosummary::
:toctree: generated/
diff --git a/doc/source/reference/routines.array-creation.rst b/doc/source/reference/routines.array-creation.rst
index 4eabb8ec7..ef9c0041d 100644
--- a/doc/source/reference/routines.array-creation.rst
+++ b/doc/source/reference/routines.array-creation.rst
@@ -66,6 +66,7 @@ Creating character arrays (:mod:`numpy.char`)
:toctree: generated/
core.defchararray.array
+ core.defchararray.asarray
Numerical ranges
----------------
diff --git a/doc/source/reference/routines.char.rst b/doc/source/reference/routines.char.rst
new file mode 100644
index 000000000..2e995a772
--- /dev/null
+++ b/doc/source/reference/routines.char.rst
@@ -0,0 +1,88 @@
+String operations
+*****************
+
+.. currentmodule:: numpy.core.defchararray
+
+This module provides a set of vectorized string operations for arrays
+of type `numpy.string_` or `numpy.unicode_`. All of them are based on
+the string methods in the Python standard library.
+
+String operations
+-----------------
+
+.. autosummary::
+ :toctree: generated/
+
+ add
+ multiply
+ mod
+ capitalize
+ center
+ decode
+ encode
+ join
+ ljust
+ lower
+ lstrip
+ partition
+ replace
+ rjust
+ rpartition
+ rsplit
+ rstrip
+ split
+ splitlines
+ strip
+ swapcase
+ title
+ translate
+ upper
+ zfill
+
+Comparison
+----------
+
+Unlike the standard numpy comparison operators, the ones in the `char`
+module strip trailing whitespace characters before performing the
+comparison.
+
+.. autosummary::
+ :toctree: generated/
+
+ equal
+ not_equal
+ greater_equal
+ less_equal
+ greater
+ less
+
+String information
+------------------
+
+.. autosummary::
+ :toctree: generated/
+
+ count
+ len
+ find
+ index
+ isalpha
+ isdecimal
+ isdigit
+ islower
+ isnumeric
+ isspace
+ istitle
+ isupper
+ rfind
+ rindex
+ startswith
+
+Convenience class
+-----------------
+
+.. autosummary::
+ :toctree: generated/
+
+ chararray
+
diff --git a/doc/source/reference/routines.rst b/doc/source/reference/routines.rst
index eb2e9fc4e..90fe1c189 100644
--- a/doc/source/reference/routines.rst
+++ b/doc/source/reference/routines.rst
@@ -34,3 +34,4 @@ Routines
routines.numarray
routines.oldnumeric
routines.ctypeslib
+ routines.char
diff --git a/numpy/core/defchararray.py b/numpy/core/defchararray.py
index 580bde59a..b59f27065 100644
--- a/numpy/core/defchararray.py
+++ b/numpy/core/defchararray.py
@@ -1,75 +1,1672 @@
"""
-Module for character arrays.
+This module contains a set of functions for vectorized string
+operations and methods.
.. note::
- The chararray module exists for backwards compatibility with Numarray,
- it is not recommended for new development. If one needs arrays of
- strings, use arrays of `dtype` object.
+ The chararray module exists for backwards compatibility with
+ Numarray, it is not recommended for new development. If one needs
+ arrays of strings, use arrays of `dtype` `object_`, `string_` or
+ `unicode_`.
-The preferred alias for `defchararray` is `numpy.char`.
+Methods will only be available if the corresponding str method is
+available in your version of Python.
+The preferred alias for `defchararray` is `numpy.char`.
"""
+
import sys
-from numerictypes import string_, unicode_, integer, object_
-from numeric import ndarray, broadcast, empty, compare_chararrays
+from numerictypes import string_, unicode_, integer, object_, bool_, character
+from numeric import ndarray, compare_chararrays
from numeric import array as narray
+from numpy.core.multiarray import _vec_string
+import numpy
__all__ = ['chararray']
_globalvar = 0
_unicode = unicode
+_len = len
+
+def _use_unicode(*args):
+ """
+ Helper function for determining the output type of some string
+ operations.
+
+ For an operation on two ndarrays, if at least one is unicode, the
+ result should be unicode.
+ """
+ for x in args:
+ if (isinstance(x, _unicode)
+ or issubclass(numpy.asarray(x).dtype.type, unicode_)):
+ return unicode_
+ return string_
+
+def _to_string_or_unicode_array(result):
+ """
+ Helper function to cast a result back into a string or unicode array
+ if an object array must be used as an intermediary.
+ """
+ return numpy.asarray(result.tolist())
+
+def _clean_args(*args):
+ """
+ Helper function for delegating arguments to Python string
+ functions.
+
+ Many of the Python string operations that have optional arguments
+ do not use 'None' to indicate a default value. In these cases,
+ we need to remove all `None` arguments, and those following them.
+ """
+ newargs = []
+ for chk in args:
+ if chk is None:
+ break
+ newargs.append(chk)
+ return newargs
+
+def _get_num_chars(a):
+ """
+ Helper function that returns the number of characters per field in
+ a string or unicode array. This is to abstract out the fact that
+ for a unicode array this is itemsize / 4.
+ """
+ if issubclass(a.dtype.type, unicode_):
+ return a.itemsize / 4
+ return a.itemsize
+
+
+def equal(x1, x2):
+ """
+ Return (x1 == x2) element-wise.
+
+ Unlike `numpy.equal`, this comparison is performed by first
+ stripping whitespace characters from the end of the string. This
+ behavior is provided for backward-compatibility with numarray.
+
+ Parameters
+ ----------
+ x1, x2 : array_like of string_ or unicode_
+ Input arrays of the same shape.
+
+ Returns
+ -------
+ out : {ndarray, bool}
+ Output array of bools, or a single bool if x1 and x2 are scalars.
+
+ See Also
+ --------
+ not_equal, greater_equal, less_equal, greater, less
+ """
+ return compare_chararrays(x1, x2, '==', True)
+
+def not_equal(x1, x2):
+ """
+ Return (x1 != x2) element-wise.
+
+ Unlike `numpy.not_equal`, this comparison is performed by first
+ stripping whitespace characters from the end of the string. This
+ behavior is provided for backward-compatibility with numarray.
+
+ Parameters
+ ----------
+ x1, x2 : array_like of string_ or unicode_
+ Input arrays of the same shape.
+
+ Returns
+ -------
+ out : {ndarray, bool}
+ Output array of bools, or a single bool if x1 and x2 are scalars.
+
+ See Also
+ --------
+ equal, greater_equal, less_equal, greater, less
+ """
+ return compare_chararrays(x1, x2, '!=', True)
+
+def greater_equal(x1, x2):
+ """
+ Return (x1 >= x2) element-wise.
+
+ Unlike `numpy.greater_equal`, this comparison is performed by
+ first stripping whitespace characters from the end of the string.
+ This behavior is provided for backward-compatibility with
+ numarray.
+
+ Parameters
+ ----------
+ x1, x2 : array_like of string_ or unicode_
+ Input arrays of the same shape.
+
+ Returns
+ -------
+ out : {ndarray, bool}
+ Output array of bools, or a single bool if x1 and x2 are scalars.
+
+ See Also
+ --------
+ equal, not_equal, less_equal, greater, less
+ """
+ return compare_chararrays(x1, x2, '>=', True)
+
+def less_equal(x1, x2):
+ """
+ Return (x1 <= x2) element-wise.
+
+ Unlike `numpy.less_equal`, this comparison is performed by first
+ stripping whitespace characters from the end of the string. This
+ behavior is provided for backward-compatibility with numarray.
+
+ Parameters
+ ----------
+ x1, x2 : array_like of string_ or unicode_
+ Input arrays of the same shape.
+
+ Returns
+ -------
+ out : {ndarray, bool}
+ Output array of bools, or a single bool if x1 and x2 are scalars.
+
+ See Also
+ --------
+ equal, not_equal, greater_equal, greater, less
+ """
+ return compare_chararrays(x1, x2, '<=', True)
+
+def greater(x1, x2):
+ """
+ Return (x1 > x2) element-wise.
+
+ Unlike `numpy.greater`, this comparison is performed by first
+ stripping whitespace characters from the end of the string. This
+ behavior is provided for backward-compatibility with numarray.
+
+ Parameters
+ ----------
+ x1, x2 : array_like of string_ or unicode_
+ Input arrays of the same shape.
+
+ Returns
+ -------
+ out : {ndarray, bool}
+ Output array of bools, or a single bool if x1 and x2 are scalars.
+
+ See Also
+ --------
+ equal, not_equal, greater_equal, less_equal, less
+ """
+ return compare_chararrays(x1, x2, '>', True)
+
+def less(x1, x2):
+ """
+ Return (x1 < x2) element-wise.
+
+ Unlike `numpy.greater`, this comparison is performed by first
+ stripping whitespace characters from the end of the string. This
+ behavior is provided for backward-compatibility with numarray.
+
+ Parameters
+ ----------
+ x1, x2 : array_like of string_ or unicode_
+ Input arrays of the same shape.
+
+ Returns
+ -------
+ out : {ndarray, bool}
+ Output array of bools, or a single bool if x1 and x2 are scalars.
+
+ See Also
+ --------
+ equal, not_equal, greater_equal, less_equal, greater
+ """
+ return compare_chararrays(x1, x2, '<', True)
+
+def str_len(a):
+ """
+ Return len(a) element-wise.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+
+ Returns
+ -------
+ out : ndarray
+ Output array of integers
+
+ See also
+ --------
+ __builtin__.len
+ """
+ return _vec_string(a, integer, '__len__')
+
+def add(x1, x2):
+ """
+ Return (x1 + x2), that is string concatenation, element-wise for a
+ pair of array_likes of string_ or unicode_.
+
+ Parameters
+ ----------
+ x1 : array_like of string_ or unicode_
+ x2 : array_like of string_ or unicode_
+
+ Returns
+ -------
+ out : ndarray
+ Output array of string_ or unicode_, depending on input types
+ """
+ arr1 = numpy.asarray(x1)
+ arr2 = numpy.asarray(x2)
+ out_size = _get_num_chars(arr1) + _get_num_chars(arr2)
+ dtype = _use_unicode(arr1, arr2)
+ return _vec_string(arr1, (dtype, out_size), '__add__', (arr2,))
+
+def multiply(a, i):
+ """
+ Return (a * i), that is string multiple concatenation,
+ element-wise.
+
+ Values in `i` of less than 0 are treated as 0 (which yields an
+ empty string).
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+ i : array_like of integers
+
+ Returns
+ -------
+ out : ndarray
+ Output array of str or unicode, depending on input types
+ """
+ a_arr = numpy.asarray(a)
+ i_arr = numpy.asarray(i)
+ if not issubclass(i_arr.dtype.type, integer):
+ raise ValueError, "Can only multiply by integers"
+ out_size = _get_num_chars(a_arr) * max(long(i_arr.max()), 0)
+ return _vec_string(
+ a_arr, (a_arr.dtype.type, out_size), '__mul__', (i_arr,))
+
+def mod(a, values):
+ """
+ Return (a % i), that is pre-Python 2.6 string formatting
+ (iterpolation), element-wise for a pair of array_likes of string_
+ or unicode_.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+ values : array_like of values
+ These values will be element-wise interpolated into the string.
+
+ Returns
+ -------
+ out : ndarray
+ Output array of string_ or unicode_, depending on input types
+
+ See also
+ --------
+ str.__mod__
+ """
+ return _to_string_or_unicode_array(
+ _vec_string(a, object_, '__mod__', (values,)))
+
+def capitalize(a):
+ """
+ Return a copy of `a` with only the first character of each element
+ capitalized.
-# special sub-class for character arrays (string_ and unicode_)
-# This adds + and * operations and methods of str and unicode types
-# which operate on an element-by-element basis
+ Calls `str.capitalize` element-wise.
+
+ For 8-bit strings, this method is locale-dependent.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+
+ Returns
+ -------
+ out : ndarray
+ Output array of string_ or unicode_, depending on input types
+
+ See also
+ --------
+ str.capitalize
+
+ Examples
+ --------
+ >>> c = np.array(['a1b2','1b2a','b2a1','2a1b'],'S4'); c
+ array(['a1b2', '1b2a', 'b2a1', '2a1b'],
+ dtype='|S4')
+ >>> np.char.capitalize(c)
+ array(['A1b2', '1b2a', 'B2a1', '2a1b'],
+ dtype='|S4')
+ """
+ a_arr = numpy.asarray(a)
+ return _vec_string(a_arr, a_arr.dtype, 'capitalize')
+
+if sys.version_info >= (2, 4):
+ def center(a, width, fillchar=' '):
+ """
+ Return a copy of `a` with its elements centered in a string of
+ length `width`.
+
+ Calls `str.center` element-wise.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+ width : int
+ The length of the resulting strings
+ fillchar : str or unicode, optional
+ The padding character to use (default is space).
+
+ Returns
+ -------
+ out : ndarray
+ Output array of string_ or unicode_, depending on input types
+
+ See also
+ --------
+ str.center
+ """
+ a_arr = numpy.asarray(a)
+ width_arr = numpy.asarray(width)
+ size = long(numpy.max(width_arr.flat))
+ return _vec_string(
+ a_arr, (a_arr.dtype.type, size), 'center', (width_arr, fillchar))
+else:
+ def center(a, width):
+ """
+ Return an array with the elements of `a` centered in a string
+ of length width.
+
+ Calls `str.center` element-wise.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+ width : int
+ The length of the resulting strings
+
+ Returns
+ -------
+ out : ndarray, str or unicode
+ Output array of string_ or unicode_, depending on input types
+
+ See also
+ --------
+ str.center
+ """
+ a_arr = numpy.asarray(a)
+ width_arr = numpy.asarray(width)
+ size = long(numpy.max(width_arr.flat))
+ return _vec_string(
+ a_arr, (a_arr.dtype.type, size), 'center', (width_arr,))
+
+def count(a, sub, start=0, end=None):
+ """
+ Returns an array with the number of non-overlapping occurrences of
+ substring `sub` in the range [`start`, `end`].
+
+ Calls `str.count` element-wise.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+ sub : str or unicode
+ The substring to search for
+ start, end : int, optional
+ Optional arguments `start` and `end` are interpreted as slice
+ notation to specify the range in which to count.
+
+ Returns
+ -------
+ out : ndarray
+ Output array of integers.
+
+ See also
+ --------
+ str.count
+
+ Examples
+ --------
+ >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
+ >>> c
+ array(['aAaAaA', ' aA', 'abBABba'],
+ dtype='|S7')
+ >>> np.char.count(c, 'A')
+ array([3, 1, 1])
+ >>> np.char.count(c, 'aA')
+ array([3, 1, 0])
+ >>> np.char.count(c, 'A', start=1, end=4)
+ array([2, 1, 1])
+ >>> np.char.count(c, 'A', start=1, end=3)
+ array([1, 0, 0])
+ """
+ return _vec_string(a, integer, 'count', [sub, start] + _clean_args(end))
+
+def decode(a, encoding=None, errors=None):
+ """
+ Calls `str.decode` element-wise.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+ encoding : str, optional
+ The name of an encoding
+ errors : str, optional
+ Specifies how to handle encoding errors
+
+ Returns
+ -------
+ out : ndarray
+
+ Notes
+ -----
+ The type of the result will depend on the encoding specified.
+
+ See also
+ --------
+ str.decode
+
+ Examples
+ --------
+ >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
+ >>> c
+ array(['aAaAaA', ' aA', 'abBABba'],
+ dtype='|S7')
+ >>> np.char.encode(c, encoding='cp037')
+ array(['\\x81\\xc1\\x81\\xc1\\x81\\xc1', '@@\\x81\\xc1@@',
+ '\\x81\\x82\\xc2\\xc1\\xc2\\x82\\x81'],
+ dtype='|S7')
+ """
+ return _to_string_or_unicode_array(
+ _vec_string(a, object_, 'decode', _clean_args(encoding, errors)))
+
+def encode(a, encoding=None, errors=None):
+ """
+ Calls `str.encode` element-wise.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+ encoding : str, optional
+ The name of an encoding
+ errors : str, optional
+ Specifies how to handle encoding errors
+
+ Returns
+ -------
+ out : ndarray
+
+ Notes
+ -----
+ The type of the result will depend on the encoding specified.
+
+ See also
+ --------
+ str.encode
+ """
+ return _to_string_or_unicode_array(
+ _vec_string(a, object_, 'encode', _clean_args(encoding, errors)))
+
+def endswith(a, suffix, start=0, end=None):
+ """
+ Returns a boolean array which is `True` where the string element
+ in `a` ends with `suffix`, otherwise `False`.
+
+ Calls `str.endswith` element-wise.
+
+ Parameters
+ ----------
+ a : array_like of string or unicode
+ suffix : str
+ start, end : int, optional
+ With optional `start`, test beginning at that position. With
+ optional `end`, stop comparing at that position.
+
+ Returns
+ -------
+ out : ndarray
+ Outputs an array of booleans.
+
+ See also
+ --------
+ str.endswith
+
+ Examples
+ --------
+ >>> s = np.array(['foo', 'bar'])
+ >>> s[0] = 'foo'
+ >>> s[1] = 'bar'
+ >>> s
+ array(['foo', 'bar'],
+ dtype='|S3')
+ >>> np.charendswith(s, 'ar')
+ array([False, True], dtype=bool)
+ >>> s.endswith(s, 'a', start=1, end=2)
+ array([False, True], dtype=bool)
+ """
+ return _vec_string(
+ a, bool_, 'endswith', [suffix, start] + _clean_args(end))
+
+def expandtabs(a, tabsize=8):
+ """
+ Return a copy of each string element where all tab characters are
+ replaced by one or more spaces.
+
+ Calls `str.expandtabs` element-wise.
+
+ Return a copy of each string element where all tab characters are
+ replaced by one or more spaces, depending on the current column
+ and the given `tabsize`. The column number is reset to zero after
+ each newline occurring in the string. If `tabsize` is not given, a
+ tab size of 8 characters is assumed. This doesn’t understand other
+ non-printing characters or escape sequences.
+
+ Parameters
+ ----------
+ a : array_like of string or unicode
+ tabsize : int, optional
+
+ Returns
+ -------
+ out : ndarray
+ Output array of str or unicode, depending on input type
+
+ See also
+ --------
+ str.expandtabs
+ """
+ return _to_string_or_unicode_array(
+ _vec_string(a, object_, 'expandtabs', (tabsize,)))
+
+def find(a, sub, start=0, end=None):
+ """
+ For each element, return the lowest index in the string where
+ substring `sub` is found.
+
+ Calls `str.find` element-wise.
+
+ For each element, return the lowest index in the string where
+ substring `sub` is found, such that `sub` is contained in the
+ range [`start`, `end`].
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+ sub : str or unicode
+ start, end : int, optional
+ Optional arguments `start` and `end` are interpreted as in
+ slice notation.
+
+ Returns
+ -------
+ out : {ndarray, integer}
+ Output array of integers. Returns -1 if `sub` is not found.
+
+ See also
+ --------
+ str.find
+ """
+ return _vec_string(
+ a, integer, 'find', [sub, start] + _clean_args(end))
+
+if sys.version_info >= (2.6):
+ def format(a, *args, **kwargs):
+ # _vec_string doesn't support kwargs at present
+ raise NotImplementedError
+
+def index(a, sub, start=0, end=None):
+ """
+ Like `find`, but raises `ValueError` when the substring is not found.
+
+ Calls `str.index` element-wise.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+ sub : str or unicode
+ start, end : int, optional
+
+ Returns
+ -------
+ out : ndarray
+ Output array of integers. Returns -1 if `sub` is not found.
+
+ See also
+ --------
+ find, str.find
+ """
+ return _vec_string(
+ a, integer, 'index', [sub, start] + _clean_args(end))
+
+def isalnum(a):
+ """
+ Returns true for each element if all characters in the string are
+ alphanumeric and there is at least one character, false otherwise.
+
+ Calls `str.isalnum` element-wise.
+
+ For 8-bit strings, this method is locale-dependent.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+
+ Returns
+ -------
+ out : ndarray
+ Output array of str or unicode, depending on input type
+
+ See also
+ --------
+ str.isalnum
+ """
+ return _vec_string(a, bool_, 'isalnum')
+
+def isalpha(a):
+ """
+ Returns true for each element if all characters in the string are
+ alphabetic and there is at least one character, false otherwise.
+
+ Calls `str.isalpha` element-wise.
+
+ For 8-bit strings, this method is locale-dependent.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+
+ Returns
+ -------
+ out : ndarray
+ Output array of booleans
+
+ See also
+ --------
+ str.isalpha
+ """
+ return _vec_string(a, bool_, 'isalpha')
+
+def isdigit(a):
+ """
+ Returns true for each element if all characters in the string are
+ digits and there is at least one character, false otherwise.
+
+ Calls `str.isdigit` element-wise.
+
+ For 8-bit strings, this method is locale-dependent.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+
+ Returns
+ -------
+ out : ndarray
+ Output array of booleans
+
+ See also
+ --------
+ str.isdigit
+ """
+ return _vec_string(a, bool_, 'isdigit')
+
+def islower(a):
+ """
+ Returns true for each element if all cased characters in the
+ string are lowercase and there is at least one cased character,
+ false otherwise.
+
+ Calls `str.islower` element-wise.
+
+ For 8-bit strings, this method is locale-dependent.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+
+ Returns
+ -------
+ out : ndarray
+ Output array of booleans
+
+ See also
+ --------
+ str.islower
+ """
+ return _vec_string(a, bool_, 'islower')
+
+def isspace(a):
+ """
+ Returns true for each element if there are only whitespace
+ characters in the string and there is at least one character,
+ false otherwise.
+
+ Calls `str.isspace` element-wise.
+
+ For 8-bit strings, this method is locale-dependent.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+
+ Returns
+ -------
+ out : ndarray
+ Output array of booleans
+
+ See also
+ --------
+ str.isspace
+ """
+ return _vec_string(a, bool_, 'isspace')
+
+def istitle(a):
+ """
+ Returns true for each element if the element is a titlecased
+ string and there is at least one character, false otherwise.
+
+ Call `str.istitle` element-wise.
+
+ For 8-bit strings, this method is locale-dependent.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+
+ Returns
+ -------
+ out : ndarray
+ Output array of booleans
+
+ See also
+ --------
+ str.istitle
+ """
+ return _vec_string(a, bool_, 'istitle')
+
+def isupper(a):
+ """
+ Returns true for each element if all cased characters in the
+ string are uppercase and there is at least one character, false
+ otherwise.
+
+ Call `str.isupper` element-wise.
+
+ For 8-bit strings, this method is locale-dependent.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+
+ Returns
+ -------
+ out : ndarray
+ Output array of booleans
+
+ See also
+ --------
+ str.isupper
+ """
+ return _vec_string(a, bool_, 'isupper')
+
+def join(sep, seq):
+ """
+ Return a string which is the concatenation of the strings in the
+ sequence `seq`.
+
+ Calls `str.join` element-wise.
+
+ Parameters
+ ----------
+ sep : array_like of string_ or unicode_
+ seq : array_like of string_ or unicode_
+
+ Returns
+ -------
+ out : ndarray
+ Output array of string_ or unicode_, depending on input types
+
+ See also
+ --------
+ str.join
+ """
+ return _to_string_or_unicode_array(
+ _vec_string(sep, object_, 'join', (seq,)))
+
+if sys.version_info >= (2, 4):
+ def ljust(a, width, fillchar=' '):
+ """
+ Return an array with the elements of `a` left-justified in a
+ string of length `width`.
+
+ Calls `str.ljust` element-wise.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+ width : int
+ The length of the resulting strings
+ fillchar : str or unicode, optional
+ The character to use for padding
+
+ Returns
+ -------
+ out : ndarray
+ Output array of str or unicode, depending on input type
+
+ See also
+ --------
+ str.ljust
+ """
+ a_arr = numpy.asarray(a)
+ width_arr = numpy.asarray(width)
+ size = long(numpy.max(width_arr.flat))
+ return _vec_string(
+ a_arr, (a_arr.dtype.type, size), 'ljust', (width_arr, fillchar))
+else:
+ def ljust(a, width):
+ """
+ Return an array with the elements of `a` left-justified in a
+ string of length `width`.
+
+ Calls `str.ljust` element-wise.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+ width : int
+ The length of the resulting strings
+
+ Returns
+ -------
+ out : ndarray
+ Output array of str or unicode, depending on input type
+
+ See also
+ --------
+ str.ljust
+ """
+ a_arr = numpy.asarray(a)
+ width_arr = numpy.asarray(width)
+ size = long(numpy.max(width_arr.flat))
+ return _vec_string(
+ a_arr, (a_arr.dtype.type, size), 'ljust', (width_arr,))
+
+def lower(a):
+ """
+ Return an array with the elements of `a` converted to lowercase.
+
+ Call `str.lower` element-wise.
+
+ For 8-bit strings, this method is locale-dependent.
+
+ Parameters
+ ----------
+ a : array-like of str or unicode
+
+ Returns
+ -------
+ out : ndarray, str or unicode
+ Output array of str or unicode, depending on input type
+
+ See also
+ --------
+ str.lower
+
+ Examples
+ --------
+ >>> c = np.array(['A1B C', '1BCA', 'BCA1']); c
+ array(['A1B C', '1BCA', 'BCA1'],
+ dtype='|S5')
+ >>> np.char.lower(c)
+ array(['a1b c', '1bca', 'bca1'],
+ dtype='|S5')
+ """
+ a_arr = numpy.asarray(a)
+ return _vec_string(a_arr, a_arr.dtype, 'lower')
+
+def lstrip(a, chars=None):
+ """
+ For each element in `a`, return a copy with the leading characters
+ removed.
+
+ Calls `str.lstrip` element-wise.
+
+ Parameters
+ ----------
+ a : array-like of str or unicode
+ chars : str or unicode, optional
+ The `chars` argument is a string specifying the set of
+ characters to be removed. If omitted or None, the `chars`
+ argument defaults to removing whitespace. The `chars` argument
+ is not a prefix; rather, all combinations of its values are
+ stripped.
+
+ Returns
+ -------
+ out : ndarray, str or unicode
+ Output array of str or unicode, depending on input type
+
+ See also
+ --------
+ str.lstrip
+
+ Examples
+ --------
+ >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
+ >>> c
+ array(['aAaAaA', ' aA', 'abBABba'],
+ dtype='|S7')
+ >>> np.char.lstrip(c, 'a') # 'a' unstripped from c[1] because whitespace leading
+ array(['AaAaA', ' aA', 'bBABba'],
+ dtype='|S6')
+ >>> np.char.lstrip(c, 'A') # leaves c unchanged
+ array(['aAaAaA', ' aA', 'abBABba'],
+ dtype='|S7')
+ >>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, '')).all()
+ True
+ >>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, None)).all()
+ True
+ """
+ a_arr = numpy.asarray(a)
+ return _vec_string(a_arr, a_arr.dtype, 'lstrip', (chars,))
+
+if sys.version_info >= (2, 5):
+ def partition(a, sep):
+ """
+ Partition each element in `a` around `sep`.
+
+ Calls `str.partition` element-wise.
+
+ For each element in `a`, split the element as the first
+ occurrence of `sep`, and return 3 strings containing the part
+ before the separator, the separator itself, and the part after
+ the separator. If the separator is not found, return 3 strings
+ containing the string itself, followed by two empty strings.
+
+ Parameters
+ ----------
+ a : array-like of str or unicode
+ sep : str or unicode
+
+ Returns
+ -------
+ out : ndarray
+ Output array of string or unicode, depending on input
+ type. The output array will have an extra dimension with
+ 3 elements per input element.
+
+ See also
+ --------
+ str.partition
+ """
+ return _to_string_or_unicode_array(
+ _vec_string(a, object_, 'partition', (sep,)))
+
+def replace(a, old, new, count=None):
+ """
+ For each element in `a`, return a copy of the string with all
+ occurrences of substring `old` replaced by `new`.
+
+ Calls `str.replace` element-wise.
+
+ Parameters
+ ----------
+ a : array-like of str or unicode
+ old, new : str or unicode
+ count : int, optional
+ If the optional argument `count` is given, only the first
+ `count` occurrences are replaced.
+
+ Returns
+ -------
+ out : ndarray
+ Output array of str or unicode, depending on input type
+
+ See also
+ --------
+ str.replace
+ """
+ return _to_string_or_unicode_array(
+ _vec_string(
+ a, object_, 'replace', [old, new] +_clean_args(count)))
+
+def rfind(a, sub, start=0, end=None):
+ """
+ For each element in `a`, return the highest index in the string
+ where substring `sub` is found, such that `sub` is contained
+ within [`start`, `end`].
+
+ Calls `str.rfind` element-wise.
+
+ Parameters
+ ----------
+ a : array-like of str or unicode
+ sub : str or unicode
+ start, end : int, optional
+ Optional arguments `start` and `end` are interpreted as in
+ slice notation.
+
+ Returns
+ -------
+ out : ndarray
+ Output array of integers. Return -1 on failure.
+
+ See also
+ --------
+ str.rfind
+ """
+ return _vec_string(
+ a, integer, 'rfind', [sub, start] + _clean_args(end))
+
+def rindex(a, sub, start=0, end=None):
+ """
+ Like `rfind`, but raises `ValueError` when the substring `sub` is
+ not found.
+
+ Calls `str.rindex` element-wise.
+
+ Parameters
+ ----------
+ a : array-like of str or unicode
+ sub : str or unicode
+ start, end : int, optional
+
+ Returns
+ -------
+ out : ndarray
+ Output array of integers.
+
+ See also
+ --------
+ rfind, str.rindex
+ """
+ return _vec_string(
+ a, integer, 'rindex', [sub, start] + _clean_args(end))
+
+if sys.version_info >= (2, 4):
+ def rjust(a, width, fillchar=' '):
+ """
+ Return an array with the elements of `a` right-justified in a
+ string of length `width`.
+
+ Calls `str.rjust` element-wise.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+ width : int
+ The length of the resulting strings
+ fillchar : str or unicode, optional
+ The character to use for padding
+
+ Returns
+ -------
+ out : ndarray
+ Output array of str or unicode, depending on input type
+
+ See also
+ --------
+ str.rjust
+ """
+ a_arr = numpy.asarray(a)
+ width_arr = numpy.asarray(width)
+ size = long(numpy.max(width_arr.flat))
+ return _vec_string(
+ a_arr, (a_arr.dtype.type, size), 'rjust', (width_arr, fillchar))
+else:
+ def rjust(a, width):
+ """
+ Return an array with the elements of `a` right-justified in a
+ string of length `width`.
+
+ Calls `str.rjust` element-wise.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+ width : int
+ The length of the resulting strings
+
+ Returns
+ -------
+ out : ndarray
+ Output array of str or unicode, depending on input type
+
+ See also
+ --------
+ str.rjust
+ """
+ a_arr = numpy.asarray(a)
+ width_arr = numpy.asarray(width)
+ size = long(numpy.max(width_arr.flat))
+ return _vec_string(
+ a_arr, (a_arr.dtype.type, size), 'rjust', (width,))
+
+if sys.version_info >= (2, 5):
+ def rpartition(a, sep):
+ """
+ Partition each element in `a` around `sep`.
+
+ Calls `str.rpartition` element-wise.
+
+ For each element in `a`, split the element as the last
+ occurrence of `sep`, and return 3 strings containing the part
+ before the separator, the separator itself, and the part after
+ the separator. If the separator is not found, return 3 strings
+ containing the string itself, followed by two empty strings.
+
+ Parameters
+ ----------
+ a : array-like of str or unicode
+ sep : str or unicode
+
+ Returns
+ -------
+ out : ndarray
+ Output array of string or unicode, depending on input
+ type. The output array will have an extra dimension with
+ 3 elements per input element.
+
+ See also
+ --------
+ str.rpartition
+ """
+ return _to_string_or_unicode_array(
+ _vec_string(a, object_, 'rpartition', (sep,)))
+
+if sys.version_info >= (2, 4):
+ def rsplit(a, sep=None, maxsplit=None):
+ """
+ For each element in `a`, return a list of the words in the
+ string, using `sep` as the delimiter string.
+
+ Calls `str.rsplit` element-wise.
+
+ Except for splitting from the right, `rsplit`
+ behaves like `split`.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+ sep : str or unicode, optional
+ If `sep` is not specified or `None`, any whitespace string
+ is a separator.
+ maxsplit : int, optional
+ If `maxsplit` is given, at most `maxsplit` splits are done,
+ the rightmost ones.
+
+ Returns
+ -------
+ out : ndarray
+ Array of list objects
+
+ See also
+ --------
+ str.rsplit, split
+ """
+ # This will return an array of lists of different sizes, so we
+ # leave it as an object array
+ return _vec_string(
+ a, object_, 'rsplit', [sep] + _clean_args(maxsplit))
+
+def rstrip(a, chars=None):
+ """
+ For each element in `a`, return a copy with the trailing
+ characters removed.
+
+ Calls `str.rstrip` element-wise.
+
+ Parameters
+ ----------
+ a : array-like of str or unicode
+ chars : str or unicode, optional
+ The `chars` argument is a string specifying the set of
+ characters to be removed. If omitted or None, the `chars`
+ argument defaults to removing whitespace. The `chars` argument
+ is not a suffix; rather, all combinations of its values are
+ stripped.
+
+ Returns
+ -------
+ out : ndarray
+ Output array of str or unicode, depending on input type
+
+ See also
+ --------
+ str.rstrip
+
+ Examples
+ --------
+ >>> c = np.array(['aAaAaA', 'abBABba'], dtype='S7'); c
+ array(['aAaAaA', 'abBABba'],
+ dtype='|S7')
+ >>> np.char.rstrip(c, 'a')
+ array(['aAaAaA', 'abBABb'],
+ dtype='|S6')
+ >>> np.char.rstrip(c, 'A')
+ array(['aAaAa', 'abBABba'],
+ dtype='|S7')
+ """
+ a_arr = numpy.asarray(a)
+ return _vec_string(a_arr, a_arr.dtype, 'rstrip', (chars,))
+
+def split(a, sep=None, maxsplit=None):
+ """
+ For each element in `a`, return a list of the words in the
+ string, using `sep` as the delimiter string.
+
+ Calls `str.rsplit` element-wise.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+ sep : str or unicode, optional
+ If `sep` is not specified or `None`, any whitespace string is a
+ separator.
+ maxsplit : int, optional
+ If `maxsplit` is given, at most `maxsplit` splits are done.
+
+ Returns
+ -------
+ out : ndarray
+ Array of list objects
+
+ See also
+ --------
+ str.split, rsplit
+ """
+ # This will return an array of lists of different sizes, so we
+ # leave it as an object array
+ return _vec_string(
+ a, object_, 'split', [sep] + _clean_args(maxsplit))
+
+def splitlines(a, keepends=None):
+ """
+ For each element in `a`, return a list of the lines in the
+ element, breaking at line boundaries.
+
+ Calls `str.splitlines` element-wise.
+
+ Parameters
+ ----------
+ a : array_like of string_ or unicode_
+ keepends : bool, optional
+ Line breaks are not included in the resulting list unless
+ keepends is given and true.
+
+ Returns
+ -------
+ out : ndarray
+ Array of list objects
+
+ See also
+ --------
+ str.splitlines
+ """
+ return _vec_string(
+ a, object_, 'splitlines', _clean_args(keepends))
+
+def startswith(a, prefix, start=0, end=None):
+ """
+ Returns a boolean array which is `True` where the string element
+ in `a` starts with `prefix`, otherwise `False`.
+
+ Calls `str.startswith` element-wise.
+
+ Parameters
+ ----------
+ a : array_like of string or unicode
+ suffix : str
+ start, end : int, optional
+ end : int, optional
+ With optional `start`, test beginning at that position. With
+ optional `end`, stop comparing at that position.
+
+ Returns
+ -------
+ out : ndarray
+ Array of booleans
+
+ See also
+ --------
+ str.startswith
+ """
+ return _vec_string(
+ a, bool_, 'startswith', [prefix, start] + _clean_args(end))
+
+def strip(a, chars=None):
+ """
+ For each element in `a`, return a copy with the leading and
+ trailing characters removed.
+
+ Calls `str.rstrip` element-wise.
+
+ Parameters
+ ----------
+ a : array-like of str or unicode
+ chars : str or unicode, optional
+ The `chars` argument is a string specifying the set of
+ characters to be removed. If omitted or None, the `chars`
+ argument defaults to removing whitespace. The `chars` argument
+ is not a prefix or suffix; rather, all combinations of its
+ values are stripped.
+
+ Returns
+ -------
+ out : ndarray
+ Output array of str or unicode, depending on input type
+
+ See also
+ --------
+ str.strip
+
+ Examples
+ --------
+ >>> c = np.array(['aAaAaA', ' aA ', 'abBABba'])
+ >>> c
+ array(['aAaAaA', ' aA', 'abBABba'],
+ dtype='|S7')
+ >>> np.char.strip(c)
+ array(['aAaAaA', 'aA', 'abBABba'],
+ dtype='|S7')
+ >>> np.char.strip(c, 'a') # 'a' unstripped from c[1] because whitespace leads
+ array(['AaAaA', ' aA', 'bBABb'],
+ dtype='|S6')
+ >>> np.char.strip(c, 'A') # 'A' unstripped from c[1] because (unprinted) ws trails
+ array(['aAaAa', ' aA', 'abBABba'],
+ dtype='|S7')
+ """
+ a_arr = numpy.asarray(a)
+ return _vec_string(a_arr, a_arr.dtype, 'strip', _clean_args(chars))
+
+def swapcase(a):
+ """
+ For each element in `a`, return a copy of the string with
+ uppercase characters converted to lowercase and vice versa.
+
+ Calls `str.swapcase` element-wise.
+
+ For 8-bit strings, this method is locale-dependent.
+
+ Parameters
+ ----------
+ a : array-like of str or unicode
+
+ Returns
+ -------
+ out : ndarray
+ Output array of str or unicode, depending on input type
+
+ See also
+ --------
+ str.swapcase
+
+ Examples
+ --------
+ >>> c=np.array(['a1B c','1b Ca','b Ca1','cA1b'],'S5'); c
+ array(['a1B c', '1b Ca', 'b Ca1', 'cA1b'],
+ dtype='|S5')
+ >>> np.char.swapcase(c)
+ array(['A1b C', '1B cA', 'B cA1', 'Ca1B'],
+ dtype='|S5')
+ """
+ a_arr = numpy.asarray(a)
+ return _vec_string(a_arr, a_arr.dtype, 'swapcase')
+
+def title(a):
+ """
+ For each element in `a`, return a titlecased version of the
+ string: words start with uppercase characters, all remaining cased
+ characters are lowercase.
+
+ Calls `str.title` element-wise.
+
+ For 8-bit strings, this method is locale-dependent.
+
+ Parameters
+ ----------
+ a : array-like of str or unicode
+
+ Returns
+ -------
+ out : ndarray
+ Output array of str or unicode, depending on input type
+
+ See also
+ --------
+ str.title
+
+ Examples
+ --------
+ >>> c=np.array(['a1b c','1b ca','b ca1','ca1b'],'S5'); c
+ array(['a1b c', '1b ca', 'b ca1', 'ca1b'],
+ dtype='|S5')
+ >>> np.char.title(c)
+ chararray(['A1B C', '1B Ca', 'B Ca1', 'Ca1B'],
+ dtype='|S5')
+ """
+ a_arr = numpy.asarray(a)
+ return _vec_string(a_arr, a_arr.dtype, 'title')
+
+def translate(a, table, deletechars=None):
+ """
+ For each element in `a`, return a copy of the string where all
+ characters occurring in the optional argument `deletechars` are
+ removed, and the remaining characters have been mapped through the
+ given translation table.
+
+ Calls `str.translate` element-wise.
+
+ Parameters
+ ----------
+ a : array-like of str or unicode
+ table : str of length 256
+ deletechars : str
+
+ Returns
+ -------
+ out : ndarray
+ Output array of str or unicode, depending on input type
+
+ See also
+ --------
+ str.translate
+ """
+ a_arr = numpy.asarray(a)
+ if issubclass(a_arr.dtype.type, unicode_):
+ return _vec_string(
+ a_arr, a_arr.dtype, 'translate', (table,))
+ else:
+ return _vec_string(
+ a_arr, a_arr.dtype, 'translate', [table] + _clean_args(deletechars))
+
+def upper(a):
+ """
+ Return an array with the elements of `a` converted to uppercase.
+
+ Calls `str.upper` element-wise.
+
+ For 8-bit strings, this method is locale-dependent.
+
+ Parameters
+ ----------
+ a : array-like of str or unicode
+
+ Returns
+ -------
+ out : ndarray
+ Output array of str or unicode, depending on input type
+
+ See also
+ --------
+ str.upper
+
+ Examples
+ --------
+ >>> c = np.array(['a1b c', '1bca', 'bca1']); c
+ array(['a1b c', '1bca', 'bca1'],
+ dtype='|S5')
+ >>> numpy.char.upper(c)
+ array(['A1B C', '1BCA', 'BCA1'],
+ dtype='|S5')
+ """
+ a_arr = numpy.asarray(a)
+ return _vec_string(a_arr, a_arr.dtype, 'upper')
+
+def zfill(a, width):
+ """
+ Return the numeric string left-filled with zeros in a string of
+ length `width`.
+
+ Calls `str.zfill` element-wise.
+
+ Parameters
+ ----------
+ a : array-like of str or unicode
+ width : int
+
+ Returns
+ -------
+ out : ndarray
+ Output array of str or unicode, depending on input type
+
+ See also
+ --------
+ str.zfill
+ """
+ a_arr = numpy.asarray(a)
+ width_arr = numpy.asarray(width)
+ size = long(numpy.max(width_arr.flat))
+ return _vec_string(
+ a_arr, (a_arr.dtype.type, size), 'zfill', (width_arr,))
+
+def isnumeric(a):
+ """
+ For each element in `a`, return True if there are only numeric
+ characters in the element.
+
+ Calls `unicode.isnumeric` element-wise.
+
+ Numeric characters include digit characters, and all characters
+ that have the Unicode numeric value property, e.g. ``U+2155,
+ VULGAR FRACTION ONE FIFTH``.
+
+ Parameters
+ ----------
+ a : array-like of unicode
+
+ Returns
+ -------
+ out : ndarray
+ Array of booleans
+
+ See also
+ --------
+ unicode.isnumeric
+ """
+ if _use_unicode(a) != unicode_:
+ raise TypeError, "isnumeric is only available for Unicode strings and arrays"
+ return _vec_string(a, bool_, 'isnumeric')
+
+def isdecimal(a):
+ """
+ For each element in `a`, return True if there are only decimal
+ characters in the element.
+
+ Calls `unicode.isdecimal` element-wise.
+
+ Decimal characters include digit characters, and all characters
+ that that can be used to form decimal-radix numbers,
+ e.g. ``U+0660, ARABIC-INDIC DIGIT ZERO``.
+
+ Parameters
+ ----------
+ a : array-like of unicode
+
+ Returns
+ -------
+ out : ndarray
+ Array of booleans
+
+ See also
+ --------
+ unicode.isdecimal
+ """
+ if _use_unicode(a) != unicode_:
+ raise TypeError, "isnumeric is only available for Unicode strings and arrays"
+ return _vec_string(a, bool_, 'isdecimal')
-# It also strips white-space on element retrieval and on
-# comparisons
class chararray(ndarray):
"""
chararray(shape, itemsize=1, unicode=False, buffer=None, offset=0,
strides=None, order=None)
- An array of fixed size (perhaps unicode) strings.
+ Provides a convenient view on arrays of string and unicode values.
.. note::
- The chararray module exists for backwards compatibility with Numarray,
- it is not recommended for new development. If one needs arrays of
- strings, use arrays of `dtype` object.
+ This class is provided for numarray backward-compatibility.
+ New code (not concerned with numarray compatibility) should use
+ arrays of type object_, string_ or unicode_ and use the free
+ functions in :mod:`numpy.char <numpy.core.defchararray>` for
+ fast vectorized string operations instead.
+
+ Versus a regular Numpy array of type 'string_' or 'unicode_', this
+ class adds the following functionality:
- Create the array, using `buffer` (with `offset` and `strides`) if it is
- not ``None``. If `buffer` is ``None``, then construct a new array with
- `strides` in "C order," unless both ``len(shape) >= 2`` and
- ``order='Fortran'``, in which case `strides` is in "Fortran order."
+
+ 1) values automatically have whitespace removed from the end
+ when indexed
+
+ 2) comparison operators automatically remove whitespace from the
+ end when comparing values
+
+ 3) vectorized string operations are provided as methods
+ (e.g. `.endswith`) and infix operators (e.g. +, *, %)
+
+ chararrays should be created using `numpy.char.array
+ <numpy.core.defchararray.array>` or `numpy.char.asarray
+ <numpy.core.defchararray.asarray>`, rather than this constructor
+ directly.
+
+ Create the array, using `buffer` (with `offset` and `strides`) if
+ it is not ``None``. If `buffer` is ``None``, then construct a new
+ array with `strides` in "C order," unless both ``len(shape) >= 2``
+ and ``order='Fortran'``, in which case `strides` is in "Fortran
+ order."
Parameters
----------
shape : tuple
- Shape of the array.
+ Shape of the array.
itemsize : int_like, > 0, optional
- Length of each array element, in number of characters. Default is 1.
+ Length of each array element, in number of characters. Default is 1.
unicode : {True, False}, optional
- Are the array elements of unicode-type (``True``) or string-type
- (``False``, the default).
+ Are the array elements of unicode-type (``True``) or string-type
+ (``False``, the default).
buffer : integer, > 0, optional
- Memory address of the start of the array data. If ``None`` (the
- default), a new array is created.
+ Memory address of the start of the array data. If ``None``
+ (the default), a new array is created.
offset : integer, >= 0, optional
- Fixed stride displacement from the beginning of an axis? Default is
- 0.
+ Fixed stride displacement from the beginning of an axis?
+ Default is 0.
strides : array_like(?), optional
- Strides for the array (see `numpy.ndarray.strides` for full
- description), default is ``None``.
+ Strides for the array (see `numpy.ndarray.strides` for full
+ description), default is ``None``.
order : {'C', 'F'}, optional
- The order in which the array data is stored in memory: 'C' -> "row
- major" order (the default), 'F' -> "column major" (Fortran) order
+ The order in which the array data is stored in memory: 'C' ->
+ "row major" order (the default), 'F' -> "column major"
+ (Fortran) order.
Examples
--------
@@ -87,7 +1684,6 @@ class chararray(ndarray):
['abc', 'abc', 'abc'],
['abc', 'abc', 'abc']],
dtype='|S5')
-
"""
def __new__(subtype, shape, itemsize=1, unicode=False, buffer=None,
offset=0, strides=None, order='C'):
@@ -98,6 +1694,11 @@ class chararray(ndarray):
else:
dtype = string_
+ # force itemsize to be a Python long, since using Numpy integer
+ # types results in itemsize.itemsize being used as the size of
+ # strings in the new array.
+ itemsize = long(itemsize)
+
_globalvar = 1
if buffer is None:
self = ndarray.__new__(subtype, shape, (dtype, itemsize),
@@ -117,87 +1718,134 @@ class chararray(ndarray):
def __getitem__(self, obj):
val = ndarray.__getitem__(self, obj)
- if isinstance(val, (string_, unicode_)):
+ if issubclass(val.dtype.type, character):
temp = val.rstrip()
- if len(temp) == 0:
+ if _len(temp) == 0:
val = ''
else:
val = temp
return val
+ # IMPLEMENTATION NOTE: Most of the methods of this class are
+ # direct delegations to the free functions in this module.
+ # However, those that return an array of strings should instead
+ # return a chararray, so some extra wrapping is required.
+
def __eq__(self, other):
- return compare_chararrays(self, other, '==', True)
+ """
+ Return (self == other) element-wise.
+
+ See also
+ --------
+ equal
+ """
+ return equal(self, other)
def __ne__(self, other):
- return compare_chararrays(self, other, '!=', True)
+ """
+ Return (self != other) element-wise.
+
+ See also
+ --------
+ not_equal
+ """
+ return not_equal(self, other)
def __ge__(self, other):
- return compare_chararrays(self, other, '>=', True)
+ """
+ Return (self >= other) element-wise.
+
+ See also
+ --------
+ greater_equal
+ """
+ return greater_equal(self, other)
def __le__(self, other):
- return compare_chararrays(self, other, '<=', True)
+ """
+ Return (self <= other) element-wise.
+
+ See also
+ --------
+ less_equal
+ """
+ return less_equal(self, other)
def __gt__(self, other):
- return compare_chararrays(self, other, '>', True)
+ """
+ Return (self > other) element-wise.
+
+ See also
+ --------
+ greater
+ """
+ return greater(self, other)
def __lt__(self, other):
- return compare_chararrays(self, other, '<', True)
+ """
+ Return (self < other) element-wise.
+
+ See also
+ --------
+ less
+ """
+ return less(self, other)
def __add__(self, other):
- b = broadcast(self, other)
- arr = b.iters[1].base
- outitem = self.itemsize + arr.itemsize
- result = chararray(b.shape, outitem, self.dtype is unicode_)
- res = result.flat
- for k, val in enumerate(b):
- res[k] = (val[0] + val[1])
- return result
+ """
+ Return (self + other), that is string concatenation,
+ element-wise for a pair of array_likes of string_ or unicode_.
+
+ See also
+ --------
+ add
+ """
+ return asarray(add(self, other))
def __radd__(self, other):
- b = broadcast(other, self)
- outitem = b.iters[0].base.itemsize + \
- b.iters[1].base.itemsize
- result = chararray(b.shape, outitem, self.dtype is unicode_)
- res = result.flat
- for k, val in enumerate(b):
- res[k] = (val[0] + val[1])
- return result
-
- def __mul__(self, other):
- b = broadcast(self, other)
- arr = b.iters[1].base
- if not issubclass(arr.dtype.type, integer):
- raise ValueError, "Can only multiply by integers"
- outitem = b.iters[0].base.itemsize * arr.max()
- result = chararray(b.shape, outitem, self.dtype is unicode_)
- res = result.flat
- for k, val in enumerate(b):
- res[k] = val[0]*val[1]
- return result
-
- def __rmul__(self, other):
- b = broadcast(self, other)
- arr = b.iters[1].base
- if not issubclass(arr.dtype.type, integer):
- raise ValueError, "Can only multiply by integers"
- outitem = b.iters[0].base.itemsize * arr.max()
- result = chararray(b.shape, outitem, self.dtype is unicode_)
- res = result.flat
- for k, val in enumerate(b):
- res[k] = val[0]*val[1]
- return result
-
- def __mod__(self, other):
- b = broadcast(self, other)
- res = [None]*b.size
- maxsize = -1
- for k,val in enumerate(b):
- newval = val[0] % val[1]
- maxsize = max(len(newval), maxsize)
- res[k] = newval
- newarr = chararray(b.shape, maxsize, self.dtype is unicode_)
- newarr[:] = res
- return newarr
+ """
+ Return (other + self), that is string concatenation,
+ element-wise for a pair of array_likes of string_ or unicode_.
+
+ See also
+ --------
+ add
+ """
+ return asarray(add(numpy.asarray(other), self))
+
+ def __mul__(self, i):
+ """
+ Return (self * i), that is string multiple concatenation,
+ element-wise.
+
+ See also
+ --------
+ multiply
+ """
+ return asarray(multiply(self, i))
+
+ def __rmul__(self, i):
+ """
+ Return (self * i), that is string multiple concatenation,
+ element-wise.
+
+ See also
+ --------
+ multiply
+ """
+ return asarray(multiply(self, i))
+
+ def __mod__(self, i):
+ """
+ Return (self % i), that is pre-Python 2.6 string formatting
+ (iterpolation), element-wise for a pair of array_likes of string_
+ or unicode_.
+
+ See also
+ --------
+ mod
+ """
+ return asarray(mod(self, i))
def __rmod__(self, other):
return NotImplemented
@@ -221,587 +1869,659 @@ class chararray(ndarray):
"""
return self.__array__().argsort(axis, kind, order)
-
- def _generalmethod(self, name, myiter):
- res = [None]*myiter.size
- maxsize = -1
- for k, val in enumerate(myiter):
- newval = []
- for chk in val[1:]:
- if not chk or (chk.dtype is object_ and chk.item() is None):
- break
- newval.append(chk)
- newitem = getattr(val[0],name)(*newval)
- maxsize = max(len(newitem), maxsize)
- res[k] = newitem
- newarr = chararray(myiter.shape, maxsize, self.dtype is unicode_)
- newarr[:] = res
- return newarr
-
- def _typedmethod(self, name, myiter, dtype):
- result = empty(myiter.shape, dtype=dtype)
- res = result.flat
- for k, val in enumerate(myiter):
- newval = []
- for chk in val[1:]:
- if not chk or (chk.dtype is object_ and chk.item() is None):
- break
- newval.append(chk)
- this_str = val[0].rstrip('\x00')
- newitem = getattr(this_str,name)(*newval)
- res[k] = newitem
- return result
-
- def _samemethod(self, name):
- result = self.copy()
- res = result.flat
- for k, val in enumerate(self.flat):
- res[k] = getattr(val, name)()
- return result
+ argsort.__doc__ = ndarray.argsort.__doc__
def capitalize(self):
"""
- Capitalize the first character of each array element.
-
- For each element of `self`, if the first character is a letter
- possessing both "upper-case" and "lower-case" forms, and it is
- presently in lower-case, change it to upper-case; otherwise, leave
- it untouched.
-
- Parameters
- ----------
- None
-
- Returns
- -------
- ret : chararray
- `self` with each element "title-cased."
+ Return a copy of `self` with only the first character of each element
+ capitalized.
- Examples
+ See also
--------
- >>> c = np.array(['a1b2','1b2a','b2a1','2a1b'],'S4').view(np.chararray); c
- chararray(['a1b2', '1b2a', 'b2a1', '2a1b'],
- dtype='|S4')
- >>> c.capitalize()
- chararray(['A1b2', '1b2a', 'B2a1', '2a1b'],
- dtype='|S4')
-
+ capitalize
"""
- return self._samemethod('capitalize')
+ return asarray(capitalize(self))
- if sys.version[:3] >= '2.4':
+ if sys.version_info >= (2, 4):
def center(self, width, fillchar=' '):
- return self._generalmethod('center',
- broadcast(self, width, fillchar))
- def ljust(self, width, fillchar=' '):
- return self._generalmethod('ljust',
- broadcast(self, width, fillchar))
- def rjust(self, width, fillchar=' '):
- return self._generalmethod('rjust',
- broadcast(self, width, fillchar))
- def rsplit(self, sep=None, maxsplit=None):
- return self._typedmethod('rsplit', broadcast(self, sep, maxsplit),
- object)
+ """
+ Return a copy of `self` with its elements centered in a
+ string of length `width`.
+
+ See also
+ --------
+ center
+ """
+ return asarray(center(self, width, fillchar))
else:
- def ljust(self, width):
- return self._generalmethod('ljust', broadcast(self, width))
- def rjust(self, width):
- return self._generalmethod('rjust', broadcast(self, width))
def center(self, width):
- return self._generalmethod('center', broadcast(self, width))
+ """
+ Return a copy of `self` with its elements centered in a
+ string of length `width`.
- def count(self, sub, start=None, end=None):
- """
- Return the number of occurrences of a sub-string in each array element.
+ See also
+ --------
+ center
+ """
+ return asarray(center(self, width))
- Parameters
- ----------
- sub : string
- The sub-string to count.
- start : int, optional
- The string index at which to start counting in each element.
- end : int, optional
- The string index at which to end counting in each element.
-
- Returns
- -------
- ret : ndarray of ints
- Array whose elements are the number of occurrences of `sub` in each
- element of `self`.
+ def count(self, sub, start=0, end=None):
+ """
+ Returns an array with the number of non-overlapping occurrences of
+ substring `sub` in the range [`start`, `end`].
- Examples
+ See also
--------
- >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']).view(np.chararray)
- >>> c
- chararray(['aAaAaA', ' aA', 'abBABba'],
- dtype='|S7')
- >>> c.count('A')
- array([3, 1, 1])
- >>> c.count('aA')
- array([3, 1, 0])
- >>> c.count('A', start=1, end=4)
- array([2, 1, 1])
- >>> c.count('A', start=1, end=3)
- array([1, 0, 0])
-
+ count
"""
- return self._typedmethod('count', broadcast(self, sub, start, end), int)
+ return count(self, sub, start, end)
+
- def decode(self,encoding=None,errors=None):
+ def decode(self, encoding=None, errors=None):
"""
- Return elements decoded according to the value of `encoding`.
+ Calls `str.decode` element-wise.
- Parameters
- ----------
- encoding : string, optional
- The encoding to use; for a list of acceptable values, see the
- Python docstring for the package 'encodings'
- error : Python exception object?, optional
- The exception to raise if decoding fails?
+ See also
+ --------
+ decode
+ """
+ return decode(self, encoding, errors)
- Returns
- -------
- ret : chararray
- A view of `self`, suitably decoded.
+ def encode(self, encoding=None, errors=None):
+ """
+ Calls `str.encode` element-wise.
- See Also
+ See also
--------
encode
- encodings
- (package)
-
- Examples
- --------
- >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']).view(np.chararray)
- >>> c
- chararray(['aAaAaA', ' aA', 'abBABba'],
- dtype='|S7')
- >>> c = c.encode(encoding='cp037'); c
- chararray(['\\x81\\xc1\\x81\\xc1\\x81\\xc1', '@@\\x81\\xc1@@',
- '\\x81\\x82\\xc2\\xc1\\xc2\\x82\\x81'],
- dtype='|S7')
- >>> c.decode(encoding='cp037')
- chararray(['aAaAaA', ' aA', 'abBABba'],
- dtype='|S7')
-
"""
- return self._generalmethod('decode', broadcast(self, encoding, errors))
+ return encode(self, encoding, errors)
- def encode(self,encoding=None,errors=None):
+ def endswith(self, suffix, start=0, end=None):
"""
- Return elements encoded according to the value of `encoding`.
-
- Parameters
- ----------
- encoding : string, optional
- The encoding to use; for a list of acceptable values, see the
- Python docstring for `encodings`.
- error : Python exception object, optional
- The exception to raise if encoding fails.
+ Returns a boolean array which is `True` where the string element
+ in `self` ends with `suffix`, otherwise `False`.
- Returns
- -------
- ret : chararray
- A view of `self`, suitably encoded.
-
- See Also
- --------
- decode
-
- Examples
+ See also
--------
- >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']).view(np.chararray)
- >>> c
- chararray(['aAaAaA', ' aA', 'abBABba'],
- dtype='|S7')
- >>> c.encode(encoding='cp037')
- chararray(['\\x81\\xc1\\x81\\xc1\\x81\\xc1', '@@\\x81\\xc1@@',
- '\\x81\\x82\\xc2\\xc1\\xc2\\x82\\x81'],
- dtype='|S7')
-
+ endswith
"""
- return self._generalmethod('encode', broadcast(self, encoding, errors))
+ return endswith(self, suffix, start, end)
- def endswith(self, suffix, start=None, end=None):
+ def expandtabs(self, tabsize=8):
"""
- Check whether elements end with specified suffix
-
- Given an array of strings, return a new bool array of same shape with
- the result of comparing suffix against each element; each element
- of bool array is ``True`` if element ends with specified suffix and
- ``False`` otherwise.
+ Return a copy of each string element where all tab characters are
+ replaced by one or more spaces.
- Parameters
- ----------
- suffix : string
- Compare each element in array to this.
- start : int, optional
- For each element, start searching from this position.
- end : int, optional
- For each element, stop comparing at this position.
+ See also
+ --------
+ expandtabs
+ """
+ return asarray(expandtabs(self, tabsize))
- Returns
- -------
- endswith : ndarray
- Output array of bools
+ def find(self, sub, start=0, end=None):
+ """
+ For each element, return the lowest index in the string where
+ substring `sub` is found.
- See Also
+ See also
--------
- count
find
- index
- startswith
-
- Examples
- --------
- >>> s = chararray(3, itemsize=3)
- >>> s[0] = 'foo'
- >>> s[1] = 'bar'
- >>> s
- chararray(['foo', 'bar'],
- dtype='|S3')
- >>> s.endswith('ar')
- array([False, True], dtype=bool)
- >>> s.endswith('a', start=1, end=2)
- array([False, True], dtype=bool)
-
"""
- return self._typedmethod('endswith', broadcast(self, suffix, start, end), bool)
-
- def expandtabs(self, tabsize=None):
- return self._generalmethod('endswith', broadcast(self, tabsize))
-
- def find(self, sub, start=None, end=None):
- return self._typedmethod('find', broadcast(self, sub, start, end), int)
+ return find(self, sub, start, end)
- def index(self, sub, start=None, end=None):
- return self._typedmethod('index', broadcast(self, sub, start, end), int)
+ def index(self, sub, start=0, end=None):
+ """
+ Like `find`, but raises `ValueError` when the substring is not found.
- def _ismethod(self, name):
- result = empty(self.shape, dtype=bool)
- res = result.flat
- for k, val in enumerate(self.flat):
- item = val.rstrip('\x00')
- res[k] = getattr(item, name)()
- return result
+ See also
+ --------
+ index
+ """
+ return index(self, sub, start, end)
def isalnum(self):
- return self._ismethod('isalnum')
+ """
+ Returns true for each element if all characters in the string
+ are alphanumeric and there is at least one character, false
+ otherwise.
+
+ See also
+ --------
+ isalnum
+ """
+ return isalnum(self)
def isalpha(self):
- return self._ismethod('isalpha')
+ """
+ Returns true for each element if all characters in the string
+ are alphabetic and there is at least one character, false
+ otherwise.
+
+ See also
+ --------
+ isalpha
+ """
+ return isalpha(self)
def isdigit(self):
- return self._ismethod('isdigit')
+ """
+ Returns true for each element if all characters in the string are
+ digits and there is at least one character, false otherwise.
+
+ See also
+ --------
+ isdigit
+ """
+ return isdigit(self)
def islower(self):
- return self._ismethod('islower')
+ """
+ Returns true for each element if all cased characters in the
+ string are lowercase and there is at least one cased character,
+ false otherwise.
+
+ See also
+ --------
+ islower
+ """
+ return islower(self)
def isspace(self):
- return self._ismethod('isspace')
+ """
+ Returns true for each element if there are only whitespace
+ characters in the string and there is at least one character,
+ false otherwise.
+
+ See also
+ --------
+ isspace
+ """
+ return isspace(self)
def istitle(self):
- return self._ismethod('istitle')
+ """
+ Returns true for each element if the element is a titlecased
+ string and there is at least one character, false otherwise.
+
+ See also
+ --------
+ istitle
+ """
+ return istitle(self)
def isupper(self):
- return self._ismethod('isupper')
+ """
+ Returns true for each element if all cased characters in the
+ string are uppercase and there is at least one character, false
+ otherwise.
- def join(self, seq):
- return self._generalmethod('join', broadcast(self, seq))
+ See also
+ --------
+ isupper
+ """
+ return isupper(self)
- def lower(self):
+ def join(self, seq):
"""
- Assure that every character of each array element is lower-case.
+ Return a string which is the concatenation of the strings in the
+ sequence `seq`.
- For each character possessing both "upper-case" and "lower-case" forms,
- if it is in upper-case, change it to lower; otherwise, leave it unchanged.
+ See also
+ --------
+ join
+ """
+ return join(self, seq)
- Parameters
- ----------
- None
+ if sys.version_info >= (2, 4):
+ def ljust(self, width, fillchar=' '):
+ """
+ Return an array with the elements of `self` left-justified in a
+ string of length `width`.
+
+ See also
+ --------
+ ljust
+ """
+ return asarray(ljust(self, width, fillchar))
+ else:
+ def ljust(self, width):
+ """
+ Return an array with the elements of `self` left-justified in a
+ string of length `width`.
- Returns
- -------
- ret : chararray
- `self` with all capital letters changed to lower-case.
+ See also
+ --------
+ ljust
+ """
+ return asarray(ljust(self, width))
- Examples
+ def lower(self):
+ """
+ Return an array with the elements of `self` converted to
+ lowercase.
+ See also
--------
- >>> c = np.array(['A1B C', '1BCA', 'BCA1']).view(np.chararray); c
- chararray(['A1B C', '1BCA', 'BCA1'],
- dtype='|S5')
- >>> c.lower()
- chararray(['a1b c', '1bca', 'bca1'],
- dtype='|S5')
-
+ lower
"""
- return self._samemethod('lower')
+ return asarray(lower(self))
- def lstrip(self, chars):
+ def lstrip(self, chars=None):
"""
- Remove leading characters from each element.
+ For each element in `self`, return a copy with the leading characters
+ removed.
- Returns a view of ``self`` with `chars` stripped from the start of
- each element. Note: **No Default** - `chars` must be specified (but if
- it is explicitly ``None`` or the empty string '', leading whitespace is
- removed).
+ See also
+ --------
+ lstrip
+ """
+ return asarray(lstrip(self, chars))
- Parameters
- ----------
- chars : string_like or None
- Character(s) to strip; whitespace stripped if `chars` == ``None``
- or `chars` == ''.
+ if sys.version_info >= (2, 5):
+ def partition(self, sep):
+ """
+ Partition each element in `self` around `sep`.
- Returns
- -------
- ret : chararray
- View of ``self``, each element suitably stripped.
+ See also
+ --------
+ partition
+ """
+ return asarray(partition(self, sep))
- Raises
- ------
- TypeError: lstrip() takes exactly 2 arguments (1 given)
- If `chars` is not supplied.
+ def replace(self, old, new, count=None):
+ """
+ For each element in `self`, return a copy of the string with all
+ occurrences of substring `old` replaced by `new`.
- Examples
+ See also
--------
- >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']).view(np.chararray)
- >>> c
- chararray(['aAaAaA', ' aA', 'abBABba'],
- dtype='|S7')
- >>> c.lstrip('a') # 'a' unstripped from c[1] because whitespace leading
- chararray(['AaAaA', ' aA', 'bBABba'],
- dtype='|S6')
- >>> c.lstrip('A') # leaves c unchanged
- chararray(['aAaAaA', ' aA', 'abBABba'],
- dtype='|S7')
- >>> (c.lstrip(' ') == c.lstrip('')).all()
- True
- >>> (c.lstrip(' ') == c.lstrip(None)).all()
- True
-
+ replace
"""
- return self._generalmethod('lstrip', broadcast(self, chars))
+ return asarray(replace(self, old, new, count))
- def replace(self, old, new, count=None):
- return self._generalmethod('replace', broadcast(self, old, new, count))
+ def rfind(self, sub, start=0, end=None):
+ """
+ For each element in `self`, return the highest index in the string
+ where substring `sub` is found, such that `sub` is contained
+ within [`start`, `end`].
- def rfind(self, sub, start=None, end=None):
- return self._typedmethod('rfind', broadcast(self, sub, start, end), int)
+ See also
+ --------
+ rfind
+ """
+ return rfind(self, sub, start, end)
- def rindex(self, sub, start=None, end=None):
- return self._typedmethod('rindex', broadcast(self, sub, start, end), int)
+ def rindex(self, sub, start=0, end=None):
+ """
+ Like `rfind`, but raises `ValueError` when the substring `sub` is
+ not found.
- def rstrip(self, chars=None):
+ See also
+ --------
+ rindex
"""
- Remove trailing characters.
+ return rindex(self, sub, start, end)
- Returns a view of ``self`` with `chars` stripped from the end of each
- element.
+ if sys.version_info >= (2, 4):
+ def rjust(self, width, fillchar=' '):
+ """
+ Return an array with the elements of `self`
+ right-justified in a string of length `width`.
+
+ See also
+ --------
+ rjust
+ """
+ return asarray(rjust(self, width, fillchar))
+ else:
+ def rjust(self, width):
+ """
+ Return an array with the elements of `self`
+ right-justified in a string of length `width`.
+
+ See also
+ --------
+ rjust
+ """
+ return asarray(rjust(self, width))
+
+ if sys.version_info >= (2, 5):
+ def rpartition(self, sep):
+ """
+ Partition each element in `self` around `sep`.
+
+ See also
+ --------
+ rpartition
+ """
+ return asarray(rpartition(self, sep))
+
+ if sys.version_info >= (2, 4):
+ def rsplit(self, sep=None, maxsplit=None):
+ """
+ For each element in `self`, return a list of the words in
+ the string, using `sep` as the delimiter string.
- Parameters
- ----------
- chars : string_like, optional, default=None
- Character(s) to remove.
+ See also
+ --------
+ rsplit
+ """
+ return rsplit(self, sep, maxsplit)
- Returns
- -------
- ret : chararray
- View of ``self``, each element suitably stripped.
+ def rstrip(self, chars=None):
+ """
+ For each element in `self`, return a copy with the trailing
+ characters removed.
- Examples
+ See also
--------
- >>> c = np.array(['aAaAaA', 'abBABba'], dtype='S7').view(np.chararray); c
- chararray(['aAaAaA', 'abBABba'],
- dtype='|S7')
- >>> c.rstrip('a')
- chararray(['aAaAaA', 'abBABb'],
- dtype='|S6')
- >>> c.rstrip('A')
- chararray(['aAaAa', 'abBABba'],
- dtype='|S7')
-
+ rstrip
"""
- return self._generalmethod('rstrip', broadcast(self, chars))
+ return asarray(rstrip(self, chars))
def split(self, sep=None, maxsplit=None):
- return self._typedmethod('split', broadcast(self, sep, maxsplit), object)
+ """
+ For each element in `self`, return a list of the words in the
+ string, using `sep` as the delimiter string.
- def splitlines(self, keepends=None):
- return self._typedmethod('splitlines', broadcast(self, keepends), object)
+ See also
+ --------
+ split
+ """
+ return split(self, sep, maxsplit)
- def startswith(self, prefix, start=None, end=None):
- return self._typedmethod('startswith', broadcast(self, prefix, start, end), bool)
+ def splitlines(self, keepends=None):
+ """
+ For each element in `self`, return a list of the lines in the
+ element, breaking at line boundaries.
- def strip(self, chars=None):
+ See also
+ --------
+ splitlines
"""
- Remove leading and trailing characters, whitespace by default.
+ return splitlines(self, keepends)
- Returns a view of ``self`` with `chars` stripped from the start and end of
- each element; by default leading and trailing whitespace is removed.
+ def startswith(self, prefix, start=0, end=None):
+ """
+ Returns a boolean array which is `True` where the string element
+ in `self` starts with `prefix`, otherwise `False`.
- Parameters
- ----------
- chars : string_like, optional, default=None
- Character(s) to strip; whitespace by default.
+ See also
+ --------
+ startswith
+ """
+ return startswith(self, prefix, start, end)
- Returns
- -------
- ret : chararray
- View of ``self``, each element suitably stripped.
+ def strip(self, chars=None):
+ """
+ For each element in `self`, return a copy with the leading and
+ trailing characters removed.
- Examples
+ See also
--------
- >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']).view(np.chararray)
- >>> c
- chararray(['aAaAaA', ' aA', 'abBABba'],
- dtype='|S7')
- >>> c.strip()
- chararray(['aAaAaA', 'aA', 'abBABba'],
- dtype='|S7')
- >>> c.strip('a') # 'a' unstripped from c[1] because whitespace leads
- chararray(['AaAaA', ' aA', 'bBABb'],
- dtype='|S6')
- >>> c.strip('A') # 'A' unstripped from c[1] because (unprinted) ws trails
- chararray(['aAaAa', ' aA', 'abBABba'],
- dtype='|S7')
-
+ strip
"""
- return self._generalmethod('strip', broadcast(self, chars))
+ return asarray(strip(self, chars))
def swapcase(self):
"""
- Switch upper-case letters to lower-case, and vice-versa.
+ For each element in `self`, return a copy of the string with
+ uppercase characters converted to lowercase and vice versa.
- Parameters
- ----------
- None
+ See also
+ --------
+ swapcase
+ """
+ return asarray(swapcase(self))
- Returns
- -------
- ret : chararray
- `self` with all lower-case letters capitalized and all upper-case
- changed to lower case.
+ def title(self):
+ """
+ For each element in `self`, return a titlecased version of the
+ string: words start with uppercase characters, all remaining cased
+ characters are lowercase.
- Examples
+ See also
--------
- >>> c=np.array(['a1B c','1b Ca','b Ca1','cA1b'],'S5').view(np.chararray);c
- chararray(['a1B c', '1b Ca', 'b Ca1', 'cA1b'],
- dtype='|S5')
- >>> c.swapcase()
- chararray(['A1b C', '1B cA', 'B cA1', 'Ca1B'],
- dtype='|S5')
+ title
+ """
+ return asarray(title(self))
+ def translate(self, table, deletechars=None):
"""
- return self._samemethod('swapcase')
+ For each element in `self`, return a copy of the string where
+ all characters occurring in the optional argument
+ `deletechars` are removed, and the remaining characters have
+ been mapped through the given translation table.
- def title(self):
+ See also
+ --------
+ translate
"""
- Capitalize the first character of each array element.
+ return asarray(translate(self, table, deletechars))
- For each element of `self`, if the first character is a letter
- possessing both "upper-case" and "lower-case" forms, and it is
- presently in lower-case, change it to upper-case; otherwise, leave
- it untouched.
+ def upper(self):
+ """
+ Return an array with the elements of `self` converted to
+ uppercase.
- Parameters
- ----------
- None
+ See also
+ --------
+ upper
+ """
+ return asarray(upper(self))
- Returns
- -------
- ret : chararray
- `self` with
+ def zfill(self, width):
+ """
+ Return the numeric string left-filled with zeros in a string of
+ length `width`.
- Examples
+ See also
--------
- >>> c=np.array(['a1b c','1b ca','b ca1','ca1b'],'S5').view(np.chararray);c
- chararray(['a1b c', '1b ca', 'b ca1', 'ca1b'],
- dtype='|S5')
- >>> c.title()
- chararray(['A1B C', '1B Ca', 'B Ca1', 'Ca1B'],
- dtype='|S5')
+ zfill
+ """
+ return asarray(zfill(self, width))
+ def isnumeric(self):
"""
- return self._samemethod('title')
+ For each element in `self`, return True if there are only
+ numeric characters in the element.
- def translate(self, table, deletechars=None):
- if self.dtype is unicode_:
- return self._generalmethod('translate', broadcast(self, table))
- else:
- return self._generalmethod('translate', broadcast(self, table, deletechars))
+ See also
+ --------
+ isnumeric
+ """
+ return isnumeric(self)
- def upper(self):
+ def isdecimal(self):
"""
- Capitalize every character of each array element.
+ For each element in `self`, return True if there are only
+ decimal characters in the element.
- For each character possessing both "upper-case" and "lower-case" forms,
- if it is in lower-case, change it to upper; otherwise, leave it unchanged.
+ See also
+ --------
+ isdecimal
+ """
+ return isdecimal(self)
- Parameters
- ----------
- None
- Returns
- -------
- ret : chararray
- `self` with all characters capitalized.
+def array(obj, itemsize=None, copy=True, unicode=None, order=None):
+ """
+ Create a `chararray`.
- Examples
- --------
- >>> c = np.array(['a1b c', '1bca', 'bca1']).view(np.chararray); c
- chararray(['a1b c', '1bca', 'bca1'],
- dtype='|S5')
- >>> c.upper()
- chararray(['A1B C', '1BCA', 'BCA1'],
- dtype='|S5')
+ .. note::
+ This class is provided for numarray backward-compatibility.
+ New code (not concerned with numarray compatibility) should use
+ arrays of type object_, string_ or unicode_ and use the free
+ functions in :mod:`numpy.char <numpy.core.defchararray>` for
+ fast vectorized string operations instead.
- """
- return self._samemethod('upper')
+ Versus a regular Numpy array of type `string_` or `unicode_`, this
+ class adds the following functionality:
- def zfill(self, width):
- return self._generalmethod('zfill', broadcast(self, width))
+ 1) values automatically have whitespace removed from the end
+ when indexed
+ 2) comparison operators automatically remove whitespace from the
+ end when comparing values
-def array(obj, itemsize=None, copy=True, unicode=False, order=None):
+ 3) vectorized string operations are provided as methods
+ (e.g. `str.endswith`) and infix operators (e.g. +, *, %)
+
+ Parameters
+ ----------
+ obj : array of str or unicode-like
+
+ itemsize : int, optional
+ `itemsize` is the number of characters per scalar in the
+ resulting array. If `itemsize` is None, and `obj` is an
+ object array or a Python list, the `itemsize` will be
+ automatically determined. If `itemsize` is provided and `obj`
+ is of type `str` or `unicode`, then the `obj` string will be
+ chunked into `itemsize` pieces.
+
+ copy : bool, optional
+ If true (default), then the object is copied. Otherwise, a copy
+ will only be made if __array__ returns a copy, if obj is a
+ nested sequence, or if a copy is needed to satisfy any of the other
+ requirements (`itemsize`, `unicode`, `order`, etc.).
+
+ unicode : bool, optional
+ When true, the resulting `chararray` can contain Unicode
+ characters, when false only 8-bit characters. If `unicode` is
+ `None` and `obj` is one of the following:
+
+ - a `chararray`,
+ - an ndarray of type `string_` or `unicode_`
+ - a Python str or unicode object,
+
+ then the unicode setting of the output array will be
+ automatically determined.
+
+ order : {'C', 'F', 'A'}, optional
+ Specify the order of the array. If order is 'C' (default), then the
+ array will be in C-contiguous order (last-index varies the
+ fastest). If order is 'F', then the returned array
+ will be in Fortran-contiguous order (first-index varies the
+ fastest). If order is 'A', then the returned array may
+ be in any order (either C-, Fortran-contiguous, or even
+ discontiguous).
+ """
+ if isinstance(obj, (str, _unicode)):
+ if unicode is None:
+ if isinstance(obj, _unicode):
+ unicode = True
+ else:
+ unicode = False
+ if itemsize is None:
+ itemsize = _len(obj)
+ shape = _len(obj) / itemsize
+ return chararray(shape, itemsize=itemsize, unicode=unicode,
+ buffer=obj, order=order)
+
+ if isinstance(obj, (list, tuple)):
+ obj = numpy.asarray(obj)
+
+ if isinstance(obj, ndarray) and issubclass(obj.dtype.type, character):
+ # If we just have a vanilla chararray, create a chararray
+ # view around it.
+ if not isinstance(obj, chararray):
+ obj = obj.view(chararray)
- if isinstance(obj, chararray):
if itemsize is None:
itemsize = obj.itemsize
- if copy or (itemsize != obj.itemsize) \
- or (not unicode and obj.dtype == unicode_) \
- or (unicode and obj.dtype == string_):
- return obj.astype("%s%d" % (obj.dtype.char, itemsize))
+ # itemsize is in 8-bit chars, so for Unicode, we need
+ # to divide by the size of a single Unicode character,
+ # which for Numpy is always 4
+ if issubclass(obj.dtype.type, unicode_):
+ itemsize /= 4
+
+ if unicode is None:
+ if issubclass(obj.dtype.type, unicode_):
+ unicode = True
+ else:
+ unicode = False
+
+ if unicode:
+ dtype = unicode_
else:
- return obj
+ dtype = string_
- if isinstance(obj, ndarray) and (obj.dtype in [unicode_, string_]):
- new = obj.view(chararray)
- if unicode and obj.dtype == string_:
- return new.astype((unicode_, obj.itemsize))
- elif obj.dtype == unicode_:
- return new.astype((string_, obj.itemsize))
+ if order is not None:
+ obj = numpy.asarray(obj, order=order)
+ if (copy
+ or (itemsize != obj.itemsize)
+ or (not unicode and isinstance(obj, unicode_))
+ or (unicode and isinstance(obj, string_))):
+ obj = obj.astype((dtype, long(itemsize)))
+ return obj
- if copy: return new.copy()
- else: return new
+ if isinstance(obj, ndarray) and issubclass(obj.dtype.type, object):
+ if itemsize is None:
+ # Since no itemsize was specified, convert the input array to
+ # a list so the ndarray constructor will automatically
+ # determine the itemsize for us.
+ obj = obj.tolist()
+ # Fall through to the default case
+
+ if unicode:
+ dtype = unicode_
+ else:
+ dtype = string_
- if unicode: dtype = "U"
- else: dtype = "S"
+ if itemsize is None:
+ val = narray(obj, dtype=dtype, order=order, subok=True)
+ else:
+ val = narray(obj, dtype=(dtype, itemsize), order=order, subok=True)
+ return val.view(chararray)
- if itemsize is not None:
- dtype += str(itemsize)
- if isinstance(obj, (str, _unicode)):
- if itemsize is None:
- itemsize = len(obj)
- shape = len(obj) / itemsize
- return chararray(shape, itemsize=itemsize, unicode=unicode,
- buffer=obj)
+def asarray(obj, itemsize=None, unicode=None, order=None):
+ """
+ Convert the input to a `chararray`, copying the data only if
+ necessary.
- # default
- val = narray(obj, dtype=dtype, order=order, subok=1)
+ Versus a regular Numpy array of type `string_` or `unicode_`, this
+ class adds the following functionality:
- return val.view(chararray)
+ 1) values automatically have whitespace removed from the end
+ when indexed
+
+ 2) comparison operators automatically remove whitespace from the
+ end when comparing values
+
+ 3) vectorized string operations are provided as methods
+ (e.g. `str.endswith`) and infix operators (e.g. +, *, %)
+
+ Parameters
+ ----------
+ obj : array of str or unicode-like
-def asarray(obj, itemsize=None, unicode=False, order=None):
+ itemsize : int, optional
+ `itemsize` is the number of characters per scalar in the
+ resulting array. If `itemsize` is None, and `obj` is an
+ object array or a Python list, the `itemsize` will be
+ automatically determined. If `itemsize` is provided and `obj`
+ is of type `str` or `unicode`, then the `obj` string will be
+ chunked into `itemsize` pieces.
+
+ unicode : bool, optional
+ When true, the resulting `chararray` can contain Unicode
+ characters, when false only 8-bit characters. If `unicode` is
+ `None` and `obj` is one of the following:
+
+ - a `chararray`,
+ - an ndarray of type `string_` or 'unicode_`
+ - a Python str or unicode object,
+
+ then the unicode setting of the output array will be
+ automatically determined.
+
+ order : {'C', 'F'}, optional
+ Specify the order of the array. If order is 'C' (default), then the
+ array will be in C-contiguous order (last-index varies the
+ fastest). If order is 'F', then the returned array
+ will be in Fortran-contiguous order (first-index varies the
+ fastest).
+ """
return array(obj, itemsize, copy=False,
unicode=unicode, order=order)
diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src
index 1021f8d7f..9cff6836e 100644
--- a/numpy/core/src/multiarray/arraytypes.c.src
+++ b/numpy/core/src/multiarray/arraytypes.c.src
@@ -1333,7 +1333,7 @@ static void
* longlong, ulonglong, float, double, longdouble,
* cfloat, cdouble, clongdouble, char, char, char, datetime,
* timedelta#
- * #skip = 1*17, aip->descr->elsize*3, 1*2#
+ * #skip = 1*17, aop->descr->elsize*3, 1*2#
*/
static void
OBJECT_to_@TOTYPE@(PyObject **ip, @totype@ *op, intp n,
@@ -1389,7 +1389,10 @@ static void
return;
}
}
- @to@_setitem(temp,(char *)op, aop);
+ if (@to@_setitem(temp,(char *)op, aop)) {
+ Py_DECREF(temp);
+ return;
+ }
Py_DECREF(temp);
}
}
@@ -1417,7 +1420,10 @@ static void
Py_INCREF(Py_False);
temp = Py_False;
}
- @to@_setitem(temp,(char *)op, aop);
+ if (@to@_setitem(temp,(char *)op, aop)) {
+ Py_DECREF(temp);
+ return;
+ }
Py_DECREF(temp);
}
}
@@ -3146,7 +3152,7 @@ NPY_NO_EXPORT PyArray_Descr @from@_Descr = {
/**end repeat**/
static void
-_init_datetime_descr(PyArray_Descr *descr)
+_init_datetime_descr(PyArray_Descr *descr)
{
PyArray_DatetimeMetaData *dt_data;
PyObject *cobj;
@@ -3156,12 +3162,12 @@ _init_datetime_descr(PyArray_Descr *descr)
dt_data->num = 1;
dt_data->den = 1;
dt_data->events = 1;
-
+
cobj = PyCObject_FromVoidPtr((void *)dt_data, _pya_free);
descr->metadata = PyDict_New();
PyDict_SetItemString(descr->metadata, NPY_METADATA_DTSTR, cobj);
Py_DECREF(cobj);
-
+
}
#define _MAX_LETTER 128
@@ -3251,7 +3257,7 @@ PyArray_DescrFromType(int type)
_init_datetime_descr(ret);
}
}
-
+
return ret;
}
diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c
index ae1d264f7..80b676a38 100644
--- a/numpy/core/src/multiarray/common.c
+++ b/numpy/core/src/multiarray/common.c
@@ -8,7 +8,7 @@
#include "npy_config.h"
#include "usertypes.h"
-
+
#include "common.h"
/*
@@ -66,6 +66,10 @@ _array_small_type(PyArray_Descr *chktype, PyArray_Descr* mintype)
mintype->type_num == PyArray_STRING) {
testsize = MAX(chksize, 4*minsize);
}
+ else if (chktype->type_num == PyArray_STRING &&
+ mintype->type_num == PyArray_UNICODE) {
+ testsize = MAX(chksize*4, minsize);
+ }
else {
testsize = MAX(chksize, minsize);
}
diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c
index 17c72aa6b..30def71e3 100644
--- a/numpy/core/src/multiarray/multiarraymodule.c
+++ b/numpy/core/src/multiarray/multiarraymodule.c
@@ -1796,7 +1796,7 @@ static PyObject *array_correlate(PyObject *NPY_UNUSED(dummy), PyObject *args, Py
}
static PyObject*
-array_correlate2(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *kwds)
+array_correlate2(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *kwds)
{
PyObject *shape, *a0;
int mode = 0;
@@ -1979,10 +1979,10 @@ array_set_datetimeparse_function(PyObject *NPY_UNUSED(dummy), PyObject *args, Py
PyErr_SetString(PyExc_TypeError, "Argument must be callable.");
return NULL;
}
- Py_INCREF(op);
+ Py_INCREF(op);
}
PyArray_SetDatetimeParseFunction(op);
- Py_DECREF(op);
+ Py_DECREF(op);
Py_INCREF(Py_None);
return Py_None;
}
@@ -2316,6 +2316,221 @@ compare_chararrays(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *kwds)
return NULL;
}
+static PyObject *
+_vec_string_with_args(PyArrayObject* char_array, PyArray_Descr* type,
+ PyObject* method, PyObject* args)
+{
+ PyObject* broadcast_args[NPY_MAXARGS];
+ PyArrayMultiIterObject* in_iter = NULL;
+ PyArrayObject* result = NULL;
+ PyArrayIterObject* out_iter = NULL;
+ PyObject* args_tuple = NULL;
+ Py_ssize_t i, n, nargs;
+
+ nargs = PySequence_Size(args) + 1;
+ if (nargs == -1 || nargs > NPY_MAXARGS) {
+ PyErr_Format(PyExc_ValueError, "len(args) must be < %d", NPY_MAXARGS - 1);
+ goto err;
+ }
+
+ broadcast_args[0] = (PyObject*)char_array;
+ for (i = 1; i < nargs; i++) {
+ PyObject* item = PySequence_GetItem(args, i-1);
+ if (item == NULL) {
+ goto err;
+ }
+ broadcast_args[i] = item;
+ Py_DECREF(item);
+ }
+ in_iter = (PyArrayMultiIterObject*)PyArray_MultiIterFromObjects
+ (broadcast_args, nargs, 0);
+ if (in_iter == NULL) {
+ goto err;
+ }
+ n = in_iter->numiter;
+
+ result = (PyArrayObject*)PyArray_SimpleNewFromDescr
+ (in_iter->nd, in_iter->dimensions, type);
+ if (result == NULL) {
+ goto err;
+ }
+
+ out_iter = (PyArrayIterObject*)PyArray_IterNew((PyObject*)result);
+ if (out_iter == NULL) {
+ goto err;
+ }
+
+ args_tuple = PyTuple_New(n);
+ if (args_tuple == NULL) {
+ goto err;
+ }
+
+ while (PyArray_MultiIter_NOTDONE(in_iter)) {
+ for (i = 0; i < n; i++) {
+ PyArrayIterObject* it = in_iter->iters[i];
+ PyObject* arg = PyArray_ToScalar(PyArray_ITER_DATA(it), it->ao);
+ if (arg == NULL) {
+ goto err;
+ }
+ PyTuple_SetItem(args_tuple, i, arg); /* Steals ref to arg */
+ }
+
+ PyObject* item_result = PyObject_CallObject(method, args_tuple);
+ if (item_result == NULL) {
+ goto err;
+ }
+
+ if (PyArray_SETITEM(result, PyArray_ITER_DATA(out_iter), item_result)) {
+ Py_DECREF(item_result);
+ PyErr_SetString(
+ PyExc_TypeError,
+ "result array type does not match underlying function");
+ goto err;
+ }
+ Py_DECREF(item_result);
+
+ PyArray_MultiIter_NEXT(in_iter);
+ PyArray_ITER_NEXT(out_iter);
+ }
+
+ Py_DECREF(in_iter);
+ Py_DECREF(out_iter);
+ Py_DECREF(args_tuple);
+
+ return (PyObject*)result;
+
+ err:
+ Py_XDECREF(in_iter);
+ Py_XDECREF(out_iter);
+ Py_XDECREF(args_tuple);
+ Py_XDECREF(result);
+
+ return 0;
+}
+
+static PyObject *
+_vec_string_no_args(PyArrayObject* char_array,
+ PyArray_Descr* type, PyObject* method)
+{
+ /* This is a faster version of _vec_string_args to use when there
+ are no additional arguments to the string method. This doesn't
+ require a broadcast iterator (and broadcast iterators don't work
+ with 1 argument anyway). */
+ PyArrayIterObject* in_iter = NULL;
+ PyArrayObject* result = NULL;
+ PyArrayIterObject* out_iter = NULL;
+
+ in_iter = (PyArrayIterObject*)PyArray_IterNew((PyObject*)char_array);
+ if (in_iter == NULL) {
+ goto err;
+ }
+
+ result = (PyArrayObject*)PyArray_SimpleNewFromDescr
+ (PyArray_NDIM(char_array), PyArray_DIMS(char_array), type);
+ if (result == NULL) {
+ goto err;
+ }
+
+ out_iter = (PyArrayIterObject*)PyArray_IterNew((PyObject*)result);
+ if (out_iter == NULL) {
+ goto err;
+ }
+
+ while (PyArray_ITER_NOTDONE(in_iter)) {
+ PyObject* item = PyArray_ToScalar(in_iter->dataptr, in_iter->ao);
+ if (item == NULL) {
+ goto err;
+ }
+
+ PyObject* item_result = PyObject_CallFunctionObjArgs(method, item, NULL);
+ Py_DECREF(item);
+ if (item_result == NULL) {
+ goto err;
+ }
+
+ if (PyArray_SETITEM(result, PyArray_ITER_DATA(out_iter), item_result)) {
+ Py_DECREF(item_result);
+ PyErr_SetString(
+ PyExc_TypeError,
+ "result array type does not match underlying function");
+ goto err;
+ }
+ Py_DECREF(item_result);
+
+ PyArray_ITER_NEXT(in_iter);
+ PyArray_ITER_NEXT(out_iter);
+ }
+
+ Py_DECREF(in_iter);
+ Py_DECREF(out_iter);
+
+ return (PyObject*)result;
+
+ err:
+ Py_XDECREF(in_iter);
+ Py_XDECREF(out_iter);
+ Py_XDECREF(result);
+
+ return 0;
+}
+
+static PyObject *
+_vec_string(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *kwds)
+{
+ PyArrayObject* char_array = NULL;
+ PyArray_Descr *type = NULL;
+ PyObject* method_name;
+ PyObject* args_seq = NULL;
+
+ PyObject* method = NULL;
+ PyObject* result = NULL;
+
+ if (!PyArg_ParseTuple(args, "O&O&O|O",
+ PyArray_Converter,
+ &char_array,
+ PyArray_DescrConverter,
+ &type,
+ &method_name,
+ &args_seq)) {
+ goto err;
+ }
+
+ if (PyArray_TYPE(char_array) == NPY_STRING) {
+ method = PyObject_GetAttr((PyObject *)&PyString_Type, method_name);
+ } else if (PyArray_TYPE(char_array) == NPY_UNICODE) {
+ method = PyObject_GetAttr((PyObject *)&PyUnicode_Type, method_name);
+ } else {
+ PyErr_SetString(PyExc_TypeError, "string operation on non-string array");
+ goto err;
+ }
+ if (method == NULL) {
+ goto err;
+ }
+
+ if (args_seq == NULL ||
+ (PySequence_Check(args_seq) && PySequence_Size(args_seq) == 0)) {
+ result = _vec_string_no_args(char_array, type, method);
+ } else if (PySequence_Check(args_seq)) {
+ result = _vec_string_with_args(char_array, type, method, args_seq);
+ } else {
+ PyErr_SetString(PyExc_TypeError, "'args' must be a sequence of arguments");
+ goto err;
+ }
+ if (result == NULL) {
+ goto err;
+ }
+
+ Py_DECREF(char_array);
+ Py_DECREF(method);
+
+ return (PyObject*)result;
+
+ err:
+ Py_XDECREF(char_array);
+ Py_XDECREF(method);
+
+ return 0;
+}
#ifndef __NPY_PRIVATE_NO_SIGNAL
@@ -2397,7 +2612,7 @@ static struct PyMethodDef array_module_methods[] = {
{"set_numeric_ops",
(PyCFunction)array_set_ops_function,
METH_VARARGS|METH_KEYWORDS, NULL},
- {"set_datetimeparse_function",
+ {"set_datetimeparse_function",
(PyCFunction)array_set_datetimeparse_function,
METH_VARARGS|METH_KEYWORDS, NULL},
{"set_typeDict",
@@ -2475,6 +2690,9 @@ static struct PyMethodDef array_module_methods[] = {
{"compare_chararrays",
(PyCFunction)compare_chararrays,
METH_VARARGS | METH_KEYWORDS, NULL},
+ {"_vec_string",
+ (PyCFunction)_vec_string,
+ METH_VARARGS | METH_KEYWORDS, NULL},
{"test_interrupt",
(PyCFunction)test_interrupt,
METH_VARARGS, NULL},
@@ -2729,7 +2947,7 @@ PyMODINIT_FUNC initmultiarray(void) {
if (PyErr_Occurred()) {
goto err;
}
-
+
/*
* PyExc_Exception should catch all the standard errors that are
* now raised instead of the string exception "multiarray.error"
diff --git a/numpy/core/tests/test_defchararray.py b/numpy/core/tests/test_defchararray.py
index f5053edb6..723f7b598 100644
--- a/numpy/core/tests/test_defchararray.py
+++ b/numpy/core/tests/test_defchararray.py
@@ -1,15 +1,111 @@
from numpy.testing import *
from numpy.core import *
import numpy as np
+import sys
+from numpy.core.multiarray import _vec_string
class TestBasic(TestCase):
- def test_construction(self):
- A = np.array([['abc', '123'],
- ['789', 'xyz']])
- A1 = A.view(np.chararray)
- A2 = np.chararray.__new__(np.chararray, A.shape, itemsize=A.itemsize,
- buffer=A)
- assert all(A1 == A2)
+ def test_from_object_array(self):
+ A = np.array([['abc', 2],
+ ['long ', '0123456789']], dtype='O')
+ B = np.char.array(A)
+ assert_equal(B.dtype.itemsize, 10)
+ assert_array_equal(B, [['abc', '2'], ['long', '0123456789']])
+
+ def test_from_object_array_unicode(self):
+ A = np.array([['abc', u'Sigma \u03a3'],
+ ['long ', '0123456789']], dtype='O')
+ self.failUnlessRaises(ValueError, np.char.array, (A,))
+ B = np.char.array(A, unicode=True)
+ assert_equal(B.dtype.itemsize, 10 * np.array('a', 'U').dtype.itemsize)
+ assert_array_equal(B, [['abc', u'Sigma \u03a3'], ['long', '0123456789']])
+
+ def test_from_string_array(self):
+ A = np.array([['abc', 'foo'],
+ ['long ', '0123456789']])
+ assert_equal(A.dtype.type, np.string_)
+ B = np.char.array(A)
+ assert_array_equal(B, A)
+ assert_equal(B.dtype, A.dtype)
+ assert_equal(B.shape, A.shape)
+ B[0,0] = 'changed'
+ assert B[0,0] != A[0,0]
+ C = np.char.asarray(A)
+ assert_array_equal(C, A)
+ assert_equal(C.dtype, A.dtype)
+ C[0,0] = 'changed again'
+ assert C[0,0] != B[0,0]
+ assert C[0,0] == A[0,0]
+
+ def test_from_unicode_array(self):
+ A = np.array([['abc', u'Sigma \u03a3'],
+ ['long ', '0123456789']])
+ assert_equal(A.dtype.type, np.unicode_)
+ B = np.char.array(A)
+ assert_array_equal(B, A)
+ assert_equal(B.dtype, A.dtype)
+ assert_equal(B.shape, A.shape)
+ B = np.char.array(A, unicode=True)
+ assert_array_equal(B, A)
+ assert_equal(B.dtype, A.dtype)
+ assert_equal(B.shape, A.shape)
+ def fail():
+ B = np.char.array(A, unicode=False)
+ self.failUnlessRaises(UnicodeEncodeError, fail)
+
+ def test_unicode_upconvert(self):
+ A = np.char.array(['abc'])
+ B = np.char.array([u'\u03a3'])
+ assert issubclass((A + B).dtype.type, np.unicode_)
+
+ def test_from_string(self):
+ A = np.char.array('abc')
+ assert_equal(len(A), 1)
+ assert_equal(len(A[0]), 3)
+ assert issubclass(A.dtype.type, np.string_)
+
+ def test_from_unicode(self):
+ A = np.char.array(u'\u03a3')
+ assert_equal(len(A), 1)
+ assert_equal(len(A[0]), 1)
+ assert_equal(A.itemsize, 4)
+ assert issubclass(A.dtype.type, np.unicode_)
+
+class TestVecString(TestCase):
+ def test_non_existent_method(self):
+ def fail():
+ _vec_string('a', np.string_, 'bogus')
+ self.failUnlessRaises(AttributeError, fail)
+
+ def test_non_string_array(self):
+ def fail():
+ _vec_string(1, np.string_, 'strip')
+ self.failUnlessRaises(TypeError, fail)
+
+ def test_invalid_args_tuple(self):
+ def fail():
+ _vec_string(['a'], np.string_, 'strip', 1)
+ self.failUnlessRaises(TypeError, fail)
+
+ def test_invalid_type_descr(self):
+ def fail():
+ _vec_string(['a'], 'BOGUS', 'strip')
+ self.failUnlessRaises(TypeError, fail)
+
+ def test_invalid_function_args(self):
+ def fail():
+ _vec_string(['a'], np.string_, 'strip', (1,))
+ self.failUnlessRaises(TypeError, fail)
+
+ def test_invalid_result_type(self):
+ def fail():
+ _vec_string(['a'], np.integer, 'strip')
+ self.failUnlessRaises(TypeError, fail)
+
+ def test_broadcast_error(self):
+ def fail():
+ _vec_string([['abc', 'def']], np.integer, 'find', (['a', 'd', 'j'],))
+ self.failUnlessRaises(ValueError, fail)
class TestWhitespace(TestCase):
@@ -21,15 +117,394 @@ class TestWhitespace(TestCase):
def test1(self):
assert all(self.A == self.B)
-
+ assert all(self.A >= self.B)
+ assert all(self.A <= self.B)
+ assert all(negative(self.A > self.B))
+ assert all(negative(self.A < self.B))
+ assert all(negative(self.A != self.B))
class TestChar(TestCase):
def setUp(self):
self.A = np.array('abc1', dtype='c').view(np.chararray)
def test_it(self):
- assert self.A.shape == (4,)
- assert self.A.upper()[:2].tostring() == 'AB'
+ assert_equal(self.A.shape, (4,))
+ assert_equal(self.A.upper()[:2].tostring(), 'AB')
+
+class TestComparisons(TestCase):
+ def setUp(self):
+ self.A = np.array([['abc', '123'],
+ ['789', 'xyz']]).view(np.chararray)
+ self.B = np.array([['efg', '123 '],
+ ['051', 'tuv']]).view(np.chararray)
+
+ def test_not_equal(self):
+ assert_array_equal((self.A != self.B), [[True, False], [True, True]])
+
+ def test_equal(self):
+ assert_array_equal((self.A == self.B), [[False, True], [False, False]])
+
+ def test_greater_equal(self):
+ assert_array_equal((self.A >= self.B), [[False, True], [True, True]])
+
+ def test_less_equal(self):
+ assert_array_equal((self.A <= self.B), [[True, True], [False, False]])
+
+ def test_greater(self):
+ assert_array_equal((self.A > self.B), [[False, False], [True, True]])
+
+ def test_less(self):
+ assert_array_equal((self.A < self.B), [[True, False], [False, False]])
+
+class TestInformation(TestCase):
+ def setUp(self):
+ self.A = np.array([[' abc ', ''],
+ ['12345', 'MixedCase'],
+ ['123 \t 345 \0 ', 'UPPER']]).view(np.chararray)
+ self.B = np.array([[u' \u03a3 ', u''],
+ [u'12345', u'MixedCase'],
+ [u'123 \t 345 \0 ', u'UPPER']]).view(np.chararray)
+
+ def test_len(self):
+ assert issubclass(np.char.str_len(self.A).dtype.type, np.integer)
+ assert_array_equal(np.char.str_len(self.A), [[5, 0], [5, 9], [12, 5]])
+ assert_array_equal(np.char.str_len(self.B), [[3, 0], [5, 9], [12, 5]])
+
+ def test_count(self):
+ assert issubclass(self.A.count('').dtype.type, np.integer)
+ assert_array_equal(self.A.count('a'), [[1, 0], [0, 1], [0, 0]])
+ assert_array_equal(self.A.count('123'), [[0, 0], [1, 0], [1, 0]])
+ # Python doesn't seem to like counting NULL characters
+ # assert_array_equal(self.A.count('\0'), [[0, 0], [0, 0], [1, 0]])
+ assert_array_equal(self.A.count('a', 0, 2), [[1, 0], [0, 0], [0, 0]])
+ assert_array_equal(self.B.count('a'), [[0, 0], [0, 1], [0, 0]])
+ assert_array_equal(self.B.count('123'), [[0, 0], [1, 0], [1, 0]])
+ # assert_array_equal(self.B.count('\0'), [[0, 0], [0, 0], [1, 0]])
+
+ def test_endswith(self):
+ assert issubclass(self.A.endswith('').dtype.type, np.bool_)
+ assert_array_equal(self.A.endswith(' '), [[1, 0], [0, 0], [1, 0]])
+ assert_array_equal(self.A.endswith('3', 0, 3), [[0, 0], [1, 0], [1, 0]])
+ def fail():
+ self.A.endswith('3', 'fdjk')
+ self.failUnlessRaises(TypeError, fail)
+
+ def test_find(self):
+ assert issubclass(self.A.find('a').dtype.type, np.integer)
+ assert_array_equal(self.A.find('a'), [[1, -1], [-1, 6], [-1, -1]])
+ assert_array_equal(self.A.find('3'), [[-1, -1], [2, -1], [2, -1]])
+ assert_array_equal(self.A.find('a', 0, 2), [[1, -1], [-1, -1], [-1, -1]])
+ assert_array_equal(self.A.find(['1', 'P']), [[-1, -1], [0, -1], [0, 1]])
+
+ def test_index(self):
+ def fail():
+ self.A.index('a')
+ self.failUnlessRaises(ValueError, fail)
+ assert np.char.index('abcba', 'b') == 1
+ assert issubclass(np.char.index('abcba', 'b').dtype.type, np.integer)
+
+ def test_isalnum(self):
+ assert issubclass(self.A.isalnum().dtype.type, np.bool_)
+ assert_array_equal(self.A.isalnum(), [[False, False], [True, True], [False, True]])
+
+ def test_isalpha(self):
+ assert issubclass(self.A.isalpha().dtype.type, np.bool_)
+ assert_array_equal(self.A.isalpha(), [[False, False], [False, True], [False, True]])
+
+ def test_isdigit(self):
+ assert issubclass(self.A.isdigit().dtype.type, np.bool_)
+ assert_array_equal(self.A.isdigit(), [[False, False], [True, False], [False, False]])
+
+ def test_islower(self):
+ assert issubclass(self.A.islower().dtype.type, np.bool_)
+ assert_array_equal(self.A.islower(), [[True, False], [False, False], [False, False]])
+
+ def test_isspace(self):
+ assert issubclass(self.A.isspace().dtype.type, np.bool_)
+ assert_array_equal(self.A.isspace(), [[False, False], [False, False], [False, False]])
+
+ def test_istitle(self):
+ assert issubclass(self.A.istitle().dtype.type, np.bool_)
+ assert_array_equal(self.A.istitle(), [[False, False], [False, False], [False, False]])
+
+ def test_isupper(self):
+ assert issubclass(self.A.isupper().dtype.type, np.bool_)
+ assert_array_equal(self.A.isupper(), [[False, False], [False, False], [False, True]])
+
+ def test_rfind(self):
+ assert issubclass(self.A.rfind('a').dtype.type, np.integer)
+ assert_array_equal(self.A.rfind('a'), [[1, -1], [-1, 6], [-1, -1]])
+ assert_array_equal(self.A.rfind('3'), [[-1, -1], [2, -1], [6, -1]])
+ assert_array_equal(self.A.rfind('a', 0, 2), [[1, -1], [-1, -1], [-1, -1]])
+ assert_array_equal(self.A.rfind(['1', 'P']), [[-1, -1], [0, -1], [0, 2]])
+
+ def test_rindex(self):
+ def fail():
+ self.A.rindex('a')
+ self.failUnlessRaises(ValueError, fail)
+ assert np.char.rindex('abcba', 'b') == 3
+ assert issubclass(np.char.rindex('abcba', 'b').dtype.type, np.integer)
+
+ def test_startswith(self):
+ assert issubclass(self.A.startswith('').dtype.type, np.bool_)
+ assert_array_equal(self.A.startswith(' '), [[1, 0], [0, 0], [0, 0]])
+ assert_array_equal(self.A.startswith('1', 0, 3), [[0, 0], [1, 0], [1, 0]])
+ def fail():
+ self.A.startswith('3', 'fdjk')
+ self.failUnlessRaises(TypeError, fail)
+
+
+class TestMethods(TestCase):
+ def setUp(self):
+ self.A = np.array([[' abc ', ''],
+ ['12345', 'MixedCase'],
+ ['123 \t 345 \0 ', 'UPPER']]).view(np.chararray)
+ self.B = np.array([[u' \u03a3 ', u''],
+ [u'12345', u'MixedCase'],
+ [u'123 \t 345 \0 ', u'UPPER']]).view(np.chararray)
+
+ def test_capitalize(self):
+ assert issubclass(self.A.capitalize().dtype.type, np.string_)
+ assert_array_equal(self.A.capitalize(), [
+ [' abc ', ''],
+ ['12345', 'Mixedcase'],
+ ['123 \t 345 \0 ', 'Upper']])
+ assert issubclass(self.B.capitalize().dtype.type, np.unicode_)
+ assert_array_equal(self.B.capitalize(), [
+ [u' \u03c3 ', ''],
+ ['12345', 'Mixedcase'],
+ ['123 \t 345 \0 ', 'Upper']])
+
+ def test_center(self):
+ assert issubclass(self.A.center(10).dtype.type, np.string_)
+ widths = np.array([[10, 20]])
+ C = self.A.center([10, 20])
+ assert_array_equal(np.char.str_len(C), [[10, 20], [10, 20], [12, 20]])
+ C = self.A.center(20, '#')
+ assert np.all(C.startswith('#'))
+ assert np.all(C.endswith('#'))
+ C = np.char.center('FOO', [[10, 20], [15, 8]])
+ assert issubclass(C.dtype.type, np.string_)
+ assert_array_equal(C, [
+ [' FOO ', ' FOO '],
+ [' FOO ', ' FOO ']])
+
+ def test_decode(self):
+ A = np.char.array(['736563726574206d657373616765'])
+ assert A.decode('hex_codec')[0] == 'secret message'
+
+ def test_encode(self):
+ B = self.B.encode('unicode_escape')
+ print B[0][0]
+ assert B[0][0] == r' \u03a3 '
+
+ def test_expandtabs(self):
+ T = self.A.expandtabs()
+ print T
+ assert T[2][0] == '123 345'
+
+ def test_join(self):
+ A = np.char.join([',', '#'], self.A)
+ assert issubclass(A.dtype.type, np.string_)
+ assert_array_equal(np.char.join([',', '#'], self.A), [
+ [' ,a,b,c, ', ''],
+ ['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'],
+ ['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']])
+
+ def test_ljust(self):
+ assert issubclass(self.A.ljust(10).dtype.type, np.string_)
+ widths = np.array([[10, 20]])
+ C = self.A.ljust([10, 20])
+ assert_array_equal(np.char.str_len(C), [[10, 20], [10, 20], [12, 20]])
+ C = self.A.ljust(20, '#')
+ assert_array_equal(C.startswith('#'), [
+ [False, True], [False, False], [False, False]])
+ assert np.all(C.endswith('#'))
+ C = np.char.ljust('FOO', [[10, 20], [15, 8]])
+ assert issubclass(C.dtype.type, np.string_)
+ assert_array_equal(C, [
+ ['FOO ', 'FOO '],
+ ['FOO ', 'FOO ']])
+
+ def test_lower(self):
+ assert issubclass(self.A.lower().dtype.type, np.string_)
+ assert_array_equal(self.A.lower(), [
+ [' abc ', ''],
+ ['12345', 'mixedcase'],
+ ['123 \t 345 \0 ', 'upper']])
+ assert issubclass(self.B.lower().dtype.type, np.unicode_)
+ assert_array_equal(self.B.lower(), [
+ [u' \u03c3 ', u''],
+ [u'12345', u'mixedcase'],
+ [u'123 \t 345 \0 ', u'upper']])
+
+ def test_lstrip(self):
+ assert issubclass(self.A.lstrip().dtype.type, np.string_)
+ assert_array_equal(self.A.lstrip(), [
+ ['abc ', ''],
+ ['12345', 'MixedCase'],
+ ['123 \t 345 \0 ', 'UPPER']])
+ assert_array_equal(self.A.lstrip(['1', 'M']), [
+ [' abc', ''],
+ ['2345', 'ixedCase'],
+ ['23 \t 345 \x00', 'UPPER']])
+ assert issubclass(self.B.lstrip().dtype.type, np.unicode_)
+ assert_array_equal(self.B.lstrip(), [
+ [u'\u03a3 ', ''],
+ ['12345', 'MixedCase'],
+ ['123 \t 345 \0 ', 'UPPER']])
+
+ def test_partition(self):
+ if sys.version_info >= (2, 5):
+ P = self.A.partition(['3', 'M'])
+ assert issubclass(P.dtype.type, np.string_)
+ assert_array_equal(P, [
+ [(' abc ', '', ''), ('', '', '')],
+ [('12', '3', '45'), ('', 'M', 'ixedCase')],
+ [('12', '3', ' \t 345 \0 '), ('UPPER', '', '')]])
+
+ def test_replace(self):
+ R = self.A.replace(['3', 'a'], ['##########', '@'])
+ assert issubclass(R.dtype.type, np.string_)
+ assert_array_equal(R, [
+ [' abc ', ''],
+ ['12##########45', 'MixedC@se'],
+ ['12########## \t ##########45 \x00', 'UPPER']])
+ R = self.A.replace('a', u'\u03a3')
+ assert issubclass(R.dtype.type, np.unicode_)
+ assert_array_equal(R, [
+ [u' \u03a3bc ', ''],
+ ['12345', u'MixedC\u03a3se'],
+ ['123 \t 345 \x00', 'UPPER']])
+
+ def test_rjust(self):
+ assert issubclass(self.A.rjust(10).dtype.type, np.string_)
+ widths = np.array([[10, 20]])
+ C = self.A.rjust([10, 20])
+ assert_array_equal(np.char.str_len(C), [[10, 20], [10, 20], [12, 20]])
+ C = self.A.rjust(20, '#')
+ assert np.all(C.startswith('#'))
+ assert_array_equal(C.endswith('#'), [[False, True], [False, False], [False, False]])
+ C = np.char.rjust('FOO', [[10, 20], [15, 8]])
+ assert issubclass(C.dtype.type, np.string_)
+ assert_array_equal(C, [
+ [' FOO', ' FOO'],
+ [' FOO', ' FOO']])
+
+ def test_rpartition(self):
+ if sys.version_info >= (2, 5):
+ P = self.A.rpartition(['3', 'M'])
+ assert issubclass(P.dtype.type, np.string_)
+ assert_array_equal(P, [
+ [('', '', ' abc '), ('', '', '')],
+ [('12', '3', '45'), ('', 'M', 'ixedCase')],
+ [('123 \t ', '3', '45 \0 '), ('', '', 'UPPER')]])
+
+ def test_rsplit(self):
+ A = self.A.rsplit('3')
+ assert issubclass(A.dtype.type, np.object_)
+ assert_equal(A.tolist(), [
+ [[' abc '], ['']],
+ [['12', '45'], ['MixedCase']],
+ [['12', ' \t ', '45 \x00 '], ['UPPER']]])
+
+ def test_rstrip(self):
+ assert issubclass(self.A.rstrip().dtype.type, np.string_)
+ assert_array_equal(self.A.rstrip(), [
+ [' abc', ''],
+ ['12345', 'MixedCase'],
+ ['123 \t 345', 'UPPER']])
+ assert_array_equal(self.A.rstrip(['5', 'ER']), [
+ [' abc ', ''],
+ ['1234', 'MixedCase'],
+ ['123 \t 345 \x00', 'UPP']])
+ assert issubclass(self.B.rstrip().dtype.type, np.unicode_)
+ assert_array_equal(self.B.rstrip(), [
+ [u' \u03a3', ''],
+ ['12345', 'MixedCase'],
+ ['123 \t 345', 'UPPER']])
+
+ def test_strip(self):
+ assert issubclass(self.A.strip().dtype.type, np.string_)
+ assert_array_equal(self.A.strip(), [
+ ['abc', ''],
+ ['12345', 'MixedCase'],
+ ['123 \t 345', 'UPPER']])
+ assert_array_equal(self.A.strip(['15', 'EReM']), [
+ [' abc ', ''],
+ ['234', 'ixedCas'],
+ ['23 \t 345 \x00', 'UPP']])
+ assert issubclass(self.B.strip().dtype.type, np.unicode_)
+ assert_array_equal(self.B.strip(), [
+ [u'\u03a3', ''],
+ ['12345', 'MixedCase'],
+ ['123 \t 345', 'UPPER']])
+
+ def test_split(self):
+ A = self.A.split('3')
+ assert issubclass(A.dtype.type, np.object_)
+ assert_equal(A.tolist(), [
+ [[' abc '], ['']],
+ [['12', '45'], ['MixedCase']],
+ [['12', ' \t ', '45 \x00 '], ['UPPER']]])
+
+ def test_splitlines(self):
+ A = np.char.array(['abc\nfds\nwer']).splitlines()
+ assert issubclass(A.dtype.type, np.object_)
+ assert A.shape == (1,)
+ assert len(A[0]) == 3
+
+ def test_swapcase(self):
+ assert issubclass(self.A.swapcase().dtype.type, np.string_)
+ assert_array_equal(self.A.swapcase(), [
+ [' ABC ', ''],
+ ['12345', 'mIXEDcASE'],
+ ['123 \t 345 \0 ', 'upper']])
+ assert issubclass(self.B.swapcase().dtype.type, np.unicode_)
+ assert_array_equal(self.B.swapcase(), [
+ [u' \u03c3 ', u''],
+ [u'12345', u'mIXEDcASE'],
+ [u'123 \t 345 \0 ', u'upper']])
+
+ def test_title(self):
+ assert issubclass(self.A.title().dtype.type, np.string_)
+ assert_array_equal(self.A.title(), [
+ [' Abc ', ''],
+ ['12345', 'Mixedcase'],
+ ['123 \t 345 \0 ', 'Upper']])
+ assert issubclass(self.B.title().dtype.type, np.unicode_)
+ assert_array_equal(self.B.title(), [
+ [u' \u03a3 ', u''],
+ [u'12345', u'Mixedcase'],
+ [u'123 \t 345 \0 ', u'Upper']])
+
+ def test_upper(self):
+ assert issubclass(self.A.upper().dtype.type, np.string_)
+ assert_array_equal(self.A.upper(), [
+ [' ABC ', ''],
+ ['12345', 'MIXEDCASE'],
+ ['123 \t 345 \0 ', 'UPPER']])
+ assert issubclass(self.B.upper().dtype.type, np.unicode_)
+ assert_array_equal(self.B.upper(), [
+ [u' \u03a3 ', u''],
+ [u'12345', u'MIXEDCASE'],
+ [u'123 \t 345 \0 ', u'UPPER']])
+
+ def test_isnumeric(self):
+ def fail():
+ self.A.isnumeric()
+ self.failUnlessRaises(TypeError, fail)
+ assert issubclass(self.B.isnumeric().dtype.type, np.bool_)
+ assert_array_equal(self.B.isnumeric(), [
+ [False, False], [True, False], [False, False]])
+
+ def test_isdecimal(self):
+ def fail():
+ self.A.isdecimal()
+ self.failUnlessRaises(TypeError, fail)
+ assert issubclass(self.B.isdecimal().dtype.type, np.bool_)
+ assert_array_equal(self.B.isdecimal(), [
+ [False, False], [True, False], [False, False]])
class TestOperations(TestCase):
@@ -42,20 +517,21 @@ class TestOperations(TestCase):
def test_add(self):
AB = np.array([['abcefg', '123456'],
['789051', 'xyztuv']]).view(np.chararray)
- assert all(AB == (self.A + self.B))
+ assert_array_equal(AB, (self.A + self.B))
+ assert len((self.A + self.B)[0][0]) == 6
def test_radd(self):
QA = np.array([['qabc', 'q123'],
['q789', 'qxyz']]).view(np.chararray)
- assert all(QA == ('q' + self.A))
+ assert_array_equal(QA, ('q' + self.A))
def test_mul(self):
A = self.A
-# for r in (2,3,5,7,197):
-# Ar = np.array([[A[0,0]*r, A[0,1]*r],
-# [A[1,0]*r, A[1,1]*r]]).view(np.chararray)
-#
-# assert all(Ar == (self.A * r))
+ for r in (2,3,5,7,197):
+ Ar = np.array([[A[0,0]*r, A[0,1]*r],
+ [A[1,0]*r, A[1,1]*r]]).view(np.chararray)
+
+ assert_array_equal(Ar, (self.A * r))
for ob in [object(), 'qrs']:
try:
@@ -67,11 +543,10 @@ class TestOperations(TestCase):
def test_rmul(self):
A = self.A
-# for r in (2,3,5,7,197):
-# Ar = np.array([[A[0,0]*r, A[0,1]*r],
-# [A[1,0]*r, A[1,1]*r]]).view(np.chararray)
-#
-# assert all(Ar == (r * self.A))
+ for r in (2,3,5,7,197):
+ Ar = np.array([[A[0,0]*r, A[0,1]*r],
+ [A[1,0]*r, A[1,1]*r]]).view(np.chararray)
+ assert_array_equal(Ar, (r * self.A))
for ob in [object(), 'qrs']:
try:
@@ -82,7 +557,19 @@ class TestOperations(TestCase):
self.fail("chararray can only be multiplied by integers")
def test_mod(self):
- pass
+ # From Alan McIntyre's bug #856
+ F = np.array([['%d', '%f'],['%s','%r']]).view(np.chararray)
+ C = np.array([[3,7],[19,1]])
+ FC = np.array([['3', '7.000000'],
+ ['19', '1']]).view(np.chararray)
+ assert_array_equal(FC, F % C)
+
+ A = np.array([['%.3f','%d'],['%s','%r']]).view(np.chararray)
+ A1 = np.array([['1.000','1'],['1','1']]).view(np.chararray)
+ assert_array_equal(A1, (A % 1))
+
+ A2 = np.array([['1.000','2'],['3','4']]).view(np.chararray)
+ assert_array_equal(A2, (A % [[1,2],[3,4]]))
def test_rmod(self):
assert ("%s" % self.A) == str(self.A)
@@ -98,5 +585,6 @@ class TestOperations(TestCase):
"non-string objects")
+
if __name__ == "__main__":
run_module_suite()
diff --git a/numpy/core/tests/test_regression.py b/numpy/core/tests/test_regression.py
index f638558d1..01f9ea58e 100644
--- a/numpy/core/tests/test_regression.py
+++ b/numpy/core/tests/test_regression.py
@@ -1118,5 +1118,37 @@ class TestRegression(TestCase):
i = np.lexsort((a[::-1], b))
assert_equal(i, np.arange(100, dtype=np.int))
+ def test_object_array_to_fixed_string(self):
+ """Ticket #1235."""
+ a = np.array(['abcdefgh', 'ijklmnop'], dtype=np.object_)
+ b = np.array(a, dtype=(np.string_, 8))
+ assert_equal(a, b)
+ c = np.array(a, dtype=(np.string_, 5))
+ assert_equal(c, np.array(['abcde', 'ijklm']))
+ d = np.array(a, dtype=(np.string_, 12))
+ assert_equal(a, d)
+ e = np.empty((2, ), dtype=(np.string_, 8))
+ e[:] = a[:]
+ assert_equal(a, e)
+
+ def test_unicode_to_string_cast(self):
+ """Ticket #1240."""
+ a = np.array([[u'abc', u'\u03a3'], [u'asdf', u'erw']], dtype='U')
+ def fail():
+ b = np.array(a, 'S4')
+ self.failUnlessRaises(UnicodeEncodeError, fail)
+
+ def test_mixed_string_unicode_array_creation(self):
+ a = np.array(['1234', u'123'])
+ assert a.itemsize == 16
+ a = np.array([u'123', '1234'])
+ assert a.itemsize == 16
+ a = np.array(['1234', u'123', '12345'])
+ assert a.itemsize == 20
+ a = np.array([u'123', '1234', u'12345'])
+ assert a.itemsize == 20
+ a = np.array([u'123', '1234', u'1234'])
+ assert a.itemsize == 16
+
if __name__ == "__main__":
run_module_suite()