diff options
-rw-r--r-- | doc/source/reference/arrays.classes.rst | 11 | ||||
-rw-r--r-- | doc/source/reference/routines.array-creation.rst | 1 | ||||
-rw-r--r-- | doc/source/reference/routines.char.rst | 88 | ||||
-rw-r--r-- | doc/source/reference/routines.rst | 1 | ||||
-rw-r--r-- | numpy/core/defchararray.py | 2802 | ||||
-rw-r--r-- | numpy/core/src/multiarray/arraytypes.c.src | 20 | ||||
-rw-r--r-- | numpy/core/src/multiarray/common.c | 6 | ||||
-rw-r--r-- | numpy/core/src/multiarray/multiarraymodule.c | 228 | ||||
-rw-r--r-- | numpy/core/tests/test_defchararray.py | 534 | ||||
-rw-r--r-- | numpy/core/tests/test_regression.py | 32 |
10 files changed, 3141 insertions, 582 deletions
diff --git a/doc/source/reference/arrays.classes.rst b/doc/source/reference/arrays.classes.rst index 9e94d5c4d..6d5e7bde0 100644 --- a/doc/source/reference/arrays.classes.rst +++ b/doc/source/reference/arrays.classes.rst @@ -228,11 +228,12 @@ Character arrays (:mod:`numpy.char`) single: character arrays .. note:: - The chararray module exists for backwards compatibility with Numarray, - it is not recommended for new development. If one needs arrays of - strings, use arrays of `dtype` object. + The chararray module exists for backwards compatibility with + Numarray, it is not recommended for new development. If one needs + arrays of strings, use arrays of `dtype` `object_`, `string_` or + `unicode_`. -These are enhanced arrays of either :class:`string` type or +These are enhanced arrays of either :class:`string_` type or :class:`unicode_` type. These arrays inherit from the :class:`ndarray`, but specially-define the operations ``+``, ``*``, and ``%`` on a (broadcasting) element-by-element basis. These @@ -244,7 +245,7 @@ to create a chararray is to use :meth:`self.view(chararray) <ndarray.view>` where *self* is an ndarray of string or unicode data-type. However, a chararray can also be created using the :meth:`numpy.chararray` constructor, or via the -:func:`numpy.char.array` function: +:func:`numpy.char.array <core.defchararray.array>` function: .. autosummary:: :toctree: generated/ diff --git a/doc/source/reference/routines.array-creation.rst b/doc/source/reference/routines.array-creation.rst index 4eabb8ec7..ef9c0041d 100644 --- a/doc/source/reference/routines.array-creation.rst +++ b/doc/source/reference/routines.array-creation.rst @@ -66,6 +66,7 @@ Creating character arrays (:mod:`numpy.char`) :toctree: generated/ core.defchararray.array + core.defchararray.asarray Numerical ranges ---------------- diff --git a/doc/source/reference/routines.char.rst b/doc/source/reference/routines.char.rst new file mode 100644 index 000000000..2e995a772 --- /dev/null +++ b/doc/source/reference/routines.char.rst @@ -0,0 +1,88 @@ +String operations +***************** + +.. currentmodule:: numpy.core.defchararray + +This module provides a set of vectorized string operations for arrays +of type `numpy.string_` or `numpy.unicode_`. All of them are based on +the string methods in the Python standard library. + +String operations +----------------- + +.. autosummary:: + :toctree: generated/ + + add + multiply + mod + capitalize + center + decode + encode + join + ljust + lower + lstrip + partition + replace + rjust + rpartition + rsplit + rstrip + split + splitlines + strip + swapcase + title + translate + upper + zfill + +Comparison +---------- + +Unlike the standard numpy comparison operators, the ones in the `char` +module strip trailing whitespace characters before performing the +comparison. + +.. autosummary:: + :toctree: generated/ + + equal + not_equal + greater_equal + less_equal + greater + less + +String information +------------------ + +.. autosummary:: + :toctree: generated/ + + count + len + find + index + isalpha + isdecimal + isdigit + islower + isnumeric + isspace + istitle + isupper + rfind + rindex + startswith + +Convenience class +----------------- + +.. autosummary:: + :toctree: generated/ + + chararray + diff --git a/doc/source/reference/routines.rst b/doc/source/reference/routines.rst index eb2e9fc4e..90fe1c189 100644 --- a/doc/source/reference/routines.rst +++ b/doc/source/reference/routines.rst @@ -34,3 +34,4 @@ Routines routines.numarray routines.oldnumeric routines.ctypeslib + routines.char diff --git a/numpy/core/defchararray.py b/numpy/core/defchararray.py index 580bde59a..b59f27065 100644 --- a/numpy/core/defchararray.py +++ b/numpy/core/defchararray.py @@ -1,75 +1,1672 @@ """ -Module for character arrays. +This module contains a set of functions for vectorized string +operations and methods. .. note:: - The chararray module exists for backwards compatibility with Numarray, - it is not recommended for new development. If one needs arrays of - strings, use arrays of `dtype` object. + The chararray module exists for backwards compatibility with + Numarray, it is not recommended for new development. If one needs + arrays of strings, use arrays of `dtype` `object_`, `string_` or + `unicode_`. -The preferred alias for `defchararray` is `numpy.char`. +Methods will only be available if the corresponding str method is +available in your version of Python. +The preferred alias for `defchararray` is `numpy.char`. """ + import sys -from numerictypes import string_, unicode_, integer, object_ -from numeric import ndarray, broadcast, empty, compare_chararrays +from numerictypes import string_, unicode_, integer, object_, bool_, character +from numeric import ndarray, compare_chararrays from numeric import array as narray +from numpy.core.multiarray import _vec_string +import numpy __all__ = ['chararray'] _globalvar = 0 _unicode = unicode +_len = len + +def _use_unicode(*args): + """ + Helper function for determining the output type of some string + operations. + + For an operation on two ndarrays, if at least one is unicode, the + result should be unicode. + """ + for x in args: + if (isinstance(x, _unicode) + or issubclass(numpy.asarray(x).dtype.type, unicode_)): + return unicode_ + return string_ + +def _to_string_or_unicode_array(result): + """ + Helper function to cast a result back into a string or unicode array + if an object array must be used as an intermediary. + """ + return numpy.asarray(result.tolist()) + +def _clean_args(*args): + """ + Helper function for delegating arguments to Python string + functions. + + Many of the Python string operations that have optional arguments + do not use 'None' to indicate a default value. In these cases, + we need to remove all `None` arguments, and those following them. + """ + newargs = [] + for chk in args: + if chk is None: + break + newargs.append(chk) + return newargs + +def _get_num_chars(a): + """ + Helper function that returns the number of characters per field in + a string or unicode array. This is to abstract out the fact that + for a unicode array this is itemsize / 4. + """ + if issubclass(a.dtype.type, unicode_): + return a.itemsize / 4 + return a.itemsize + + +def equal(x1, x2): + """ + Return (x1 == x2) element-wise. + + Unlike `numpy.equal`, this comparison is performed by first + stripping whitespace characters from the end of the string. This + behavior is provided for backward-compatibility with numarray. + + Parameters + ---------- + x1, x2 : array_like of string_ or unicode_ + Input arrays of the same shape. + + Returns + ------- + out : {ndarray, bool} + Output array of bools, or a single bool if x1 and x2 are scalars. + + See Also + -------- + not_equal, greater_equal, less_equal, greater, less + """ + return compare_chararrays(x1, x2, '==', True) + +def not_equal(x1, x2): + """ + Return (x1 != x2) element-wise. + + Unlike `numpy.not_equal`, this comparison is performed by first + stripping whitespace characters from the end of the string. This + behavior is provided for backward-compatibility with numarray. + + Parameters + ---------- + x1, x2 : array_like of string_ or unicode_ + Input arrays of the same shape. + + Returns + ------- + out : {ndarray, bool} + Output array of bools, or a single bool if x1 and x2 are scalars. + + See Also + -------- + equal, greater_equal, less_equal, greater, less + """ + return compare_chararrays(x1, x2, '!=', True) + +def greater_equal(x1, x2): + """ + Return (x1 >= x2) element-wise. + + Unlike `numpy.greater_equal`, this comparison is performed by + first stripping whitespace characters from the end of the string. + This behavior is provided for backward-compatibility with + numarray. + + Parameters + ---------- + x1, x2 : array_like of string_ or unicode_ + Input arrays of the same shape. + + Returns + ------- + out : {ndarray, bool} + Output array of bools, or a single bool if x1 and x2 are scalars. + + See Also + -------- + equal, not_equal, less_equal, greater, less + """ + return compare_chararrays(x1, x2, '>=', True) + +def less_equal(x1, x2): + """ + Return (x1 <= x2) element-wise. + + Unlike `numpy.less_equal`, this comparison is performed by first + stripping whitespace characters from the end of the string. This + behavior is provided for backward-compatibility with numarray. + + Parameters + ---------- + x1, x2 : array_like of string_ or unicode_ + Input arrays of the same shape. + + Returns + ------- + out : {ndarray, bool} + Output array of bools, or a single bool if x1 and x2 are scalars. + + See Also + -------- + equal, not_equal, greater_equal, greater, less + """ + return compare_chararrays(x1, x2, '<=', True) + +def greater(x1, x2): + """ + Return (x1 > x2) element-wise. + + Unlike `numpy.greater`, this comparison is performed by first + stripping whitespace characters from the end of the string. This + behavior is provided for backward-compatibility with numarray. + + Parameters + ---------- + x1, x2 : array_like of string_ or unicode_ + Input arrays of the same shape. + + Returns + ------- + out : {ndarray, bool} + Output array of bools, or a single bool if x1 and x2 are scalars. + + See Also + -------- + equal, not_equal, greater_equal, less_equal, less + """ + return compare_chararrays(x1, x2, '>', True) + +def less(x1, x2): + """ + Return (x1 < x2) element-wise. + + Unlike `numpy.greater`, this comparison is performed by first + stripping whitespace characters from the end of the string. This + behavior is provided for backward-compatibility with numarray. + + Parameters + ---------- + x1, x2 : array_like of string_ or unicode_ + Input arrays of the same shape. + + Returns + ------- + out : {ndarray, bool} + Output array of bools, or a single bool if x1 and x2 are scalars. + + See Also + -------- + equal, not_equal, greater_equal, less_equal, greater + """ + return compare_chararrays(x1, x2, '<', True) + +def str_len(a): + """ + Return len(a) element-wise. + + Parameters + ---------- + a : array_like of string_ or unicode_ + + Returns + ------- + out : ndarray + Output array of integers + + See also + -------- + __builtin__.len + """ + return _vec_string(a, integer, '__len__') + +def add(x1, x2): + """ + Return (x1 + x2), that is string concatenation, element-wise for a + pair of array_likes of string_ or unicode_. + + Parameters + ---------- + x1 : array_like of string_ or unicode_ + x2 : array_like of string_ or unicode_ + + Returns + ------- + out : ndarray + Output array of string_ or unicode_, depending on input types + """ + arr1 = numpy.asarray(x1) + arr2 = numpy.asarray(x2) + out_size = _get_num_chars(arr1) + _get_num_chars(arr2) + dtype = _use_unicode(arr1, arr2) + return _vec_string(arr1, (dtype, out_size), '__add__', (arr2,)) + +def multiply(a, i): + """ + Return (a * i), that is string multiple concatenation, + element-wise. + + Values in `i` of less than 0 are treated as 0 (which yields an + empty string). + + Parameters + ---------- + a : array_like of string_ or unicode_ + i : array_like of integers + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input types + """ + a_arr = numpy.asarray(a) + i_arr = numpy.asarray(i) + if not issubclass(i_arr.dtype.type, integer): + raise ValueError, "Can only multiply by integers" + out_size = _get_num_chars(a_arr) * max(long(i_arr.max()), 0) + return _vec_string( + a_arr, (a_arr.dtype.type, out_size), '__mul__', (i_arr,)) + +def mod(a, values): + """ + Return (a % i), that is pre-Python 2.6 string formatting + (iterpolation), element-wise for a pair of array_likes of string_ + or unicode_. + + Parameters + ---------- + a : array_like of string_ or unicode_ + values : array_like of values + These values will be element-wise interpolated into the string. + + Returns + ------- + out : ndarray + Output array of string_ or unicode_, depending on input types + + See also + -------- + str.__mod__ + """ + return _to_string_or_unicode_array( + _vec_string(a, object_, '__mod__', (values,))) + +def capitalize(a): + """ + Return a copy of `a` with only the first character of each element + capitalized. -# special sub-class for character arrays (string_ and unicode_) -# This adds + and * operations and methods of str and unicode types -# which operate on an element-by-element basis + Calls `str.capitalize` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array_like of string_ or unicode_ + + Returns + ------- + out : ndarray + Output array of string_ or unicode_, depending on input types + + See also + -------- + str.capitalize + + Examples + -------- + >>> c = np.array(['a1b2','1b2a','b2a1','2a1b'],'S4'); c + array(['a1b2', '1b2a', 'b2a1', '2a1b'], + dtype='|S4') + >>> np.char.capitalize(c) + array(['A1b2', '1b2a', 'B2a1', '2a1b'], + dtype='|S4') + """ + a_arr = numpy.asarray(a) + return _vec_string(a_arr, a_arr.dtype, 'capitalize') + +if sys.version_info >= (2, 4): + def center(a, width, fillchar=' '): + """ + Return a copy of `a` with its elements centered in a string of + length `width`. + + Calls `str.center` element-wise. + + Parameters + ---------- + a : array_like of string_ or unicode_ + width : int + The length of the resulting strings + fillchar : str or unicode, optional + The padding character to use (default is space). + + Returns + ------- + out : ndarray + Output array of string_ or unicode_, depending on input types + + See also + -------- + str.center + """ + a_arr = numpy.asarray(a) + width_arr = numpy.asarray(width) + size = long(numpy.max(width_arr.flat)) + return _vec_string( + a_arr, (a_arr.dtype.type, size), 'center', (width_arr, fillchar)) +else: + def center(a, width): + """ + Return an array with the elements of `a` centered in a string + of length width. + + Calls `str.center` element-wise. + + Parameters + ---------- + a : array_like of string_ or unicode_ + width : int + The length of the resulting strings + + Returns + ------- + out : ndarray, str or unicode + Output array of string_ or unicode_, depending on input types + + See also + -------- + str.center + """ + a_arr = numpy.asarray(a) + width_arr = numpy.asarray(width) + size = long(numpy.max(width_arr.flat)) + return _vec_string( + a_arr, (a_arr.dtype.type, size), 'center', (width_arr,)) + +def count(a, sub, start=0, end=None): + """ + Returns an array with the number of non-overlapping occurrences of + substring `sub` in the range [`start`, `end`]. + + Calls `str.count` element-wise. + + Parameters + ---------- + a : array_like of string_ or unicode_ + sub : str or unicode + The substring to search for + start, end : int, optional + Optional arguments `start` and `end` are interpreted as slice + notation to specify the range in which to count. + + Returns + ------- + out : ndarray + Output array of integers. + + See also + -------- + str.count + + Examples + -------- + >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']) + >>> c + array(['aAaAaA', ' aA', 'abBABba'], + dtype='|S7') + >>> np.char.count(c, 'A') + array([3, 1, 1]) + >>> np.char.count(c, 'aA') + array([3, 1, 0]) + >>> np.char.count(c, 'A', start=1, end=4) + array([2, 1, 1]) + >>> np.char.count(c, 'A', start=1, end=3) + array([1, 0, 0]) + """ + return _vec_string(a, integer, 'count', [sub, start] + _clean_args(end)) + +def decode(a, encoding=None, errors=None): + """ + Calls `str.decode` element-wise. + + Parameters + ---------- + a : array_like of string_ or unicode_ + encoding : str, optional + The name of an encoding + errors : str, optional + Specifies how to handle encoding errors + + Returns + ------- + out : ndarray + + Notes + ----- + The type of the result will depend on the encoding specified. + + See also + -------- + str.decode + + Examples + -------- + >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']) + >>> c + array(['aAaAaA', ' aA', 'abBABba'], + dtype='|S7') + >>> np.char.encode(c, encoding='cp037') + array(['\\x81\\xc1\\x81\\xc1\\x81\\xc1', '@@\\x81\\xc1@@', + '\\x81\\x82\\xc2\\xc1\\xc2\\x82\\x81'], + dtype='|S7') + """ + return _to_string_or_unicode_array( + _vec_string(a, object_, 'decode', _clean_args(encoding, errors))) + +def encode(a, encoding=None, errors=None): + """ + Calls `str.encode` element-wise. + + Parameters + ---------- + a : array_like of string_ or unicode_ + encoding : str, optional + The name of an encoding + errors : str, optional + Specifies how to handle encoding errors + + Returns + ------- + out : ndarray + + Notes + ----- + The type of the result will depend on the encoding specified. + + See also + -------- + str.encode + """ + return _to_string_or_unicode_array( + _vec_string(a, object_, 'encode', _clean_args(encoding, errors))) + +def endswith(a, suffix, start=0, end=None): + """ + Returns a boolean array which is `True` where the string element + in `a` ends with `suffix`, otherwise `False`. + + Calls `str.endswith` element-wise. + + Parameters + ---------- + a : array_like of string or unicode + suffix : str + start, end : int, optional + With optional `start`, test beginning at that position. With + optional `end`, stop comparing at that position. + + Returns + ------- + out : ndarray + Outputs an array of booleans. + + See also + -------- + str.endswith + + Examples + -------- + >>> s = np.array(['foo', 'bar']) + >>> s[0] = 'foo' + >>> s[1] = 'bar' + >>> s + array(['foo', 'bar'], + dtype='|S3') + >>> np.charendswith(s, 'ar') + array([False, True], dtype=bool) + >>> s.endswith(s, 'a', start=1, end=2) + array([False, True], dtype=bool) + """ + return _vec_string( + a, bool_, 'endswith', [suffix, start] + _clean_args(end)) + +def expandtabs(a, tabsize=8): + """ + Return a copy of each string element where all tab characters are + replaced by one or more spaces. + + Calls `str.expandtabs` element-wise. + + Return a copy of each string element where all tab characters are + replaced by one or more spaces, depending on the current column + and the given `tabsize`. The column number is reset to zero after + each newline occurring in the string. If `tabsize` is not given, a + tab size of 8 characters is assumed. This doesn’t understand other + non-printing characters or escape sequences. + + Parameters + ---------- + a : array_like of string or unicode + tabsize : int, optional + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input type + + See also + -------- + str.expandtabs + """ + return _to_string_or_unicode_array( + _vec_string(a, object_, 'expandtabs', (tabsize,))) + +def find(a, sub, start=0, end=None): + """ + For each element, return the lowest index in the string where + substring `sub` is found. + + Calls `str.find` element-wise. + + For each element, return the lowest index in the string where + substring `sub` is found, such that `sub` is contained in the + range [`start`, `end`]. + + Parameters + ---------- + a : array_like of string_ or unicode_ + sub : str or unicode + start, end : int, optional + Optional arguments `start` and `end` are interpreted as in + slice notation. + + Returns + ------- + out : {ndarray, integer} + Output array of integers. Returns -1 if `sub` is not found. + + See also + -------- + str.find + """ + return _vec_string( + a, integer, 'find', [sub, start] + _clean_args(end)) + +if sys.version_info >= (2.6): + def format(a, *args, **kwargs): + # _vec_string doesn't support kwargs at present + raise NotImplementedError + +def index(a, sub, start=0, end=None): + """ + Like `find`, but raises `ValueError` when the substring is not found. + + Calls `str.index` element-wise. + + Parameters + ---------- + a : array_like of string_ or unicode_ + sub : str or unicode + start, end : int, optional + + Returns + ------- + out : ndarray + Output array of integers. Returns -1 if `sub` is not found. + + See also + -------- + find, str.find + """ + return _vec_string( + a, integer, 'index', [sub, start] + _clean_args(end)) + +def isalnum(a): + """ + Returns true for each element if all characters in the string are + alphanumeric and there is at least one character, false otherwise. + + Calls `str.isalnum` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array_like of string_ or unicode_ + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input type + + See also + -------- + str.isalnum + """ + return _vec_string(a, bool_, 'isalnum') + +def isalpha(a): + """ + Returns true for each element if all characters in the string are + alphabetic and there is at least one character, false otherwise. + + Calls `str.isalpha` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array_like of string_ or unicode_ + + Returns + ------- + out : ndarray + Output array of booleans + + See also + -------- + str.isalpha + """ + return _vec_string(a, bool_, 'isalpha') + +def isdigit(a): + """ + Returns true for each element if all characters in the string are + digits and there is at least one character, false otherwise. + + Calls `str.isdigit` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array_like of string_ or unicode_ + + Returns + ------- + out : ndarray + Output array of booleans + + See also + -------- + str.isdigit + """ + return _vec_string(a, bool_, 'isdigit') + +def islower(a): + """ + Returns true for each element if all cased characters in the + string are lowercase and there is at least one cased character, + false otherwise. + + Calls `str.islower` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array_like of string_ or unicode_ + + Returns + ------- + out : ndarray + Output array of booleans + + See also + -------- + str.islower + """ + return _vec_string(a, bool_, 'islower') + +def isspace(a): + """ + Returns true for each element if there are only whitespace + characters in the string and there is at least one character, + false otherwise. + + Calls `str.isspace` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array_like of string_ or unicode_ + + Returns + ------- + out : ndarray + Output array of booleans + + See also + -------- + str.isspace + """ + return _vec_string(a, bool_, 'isspace') + +def istitle(a): + """ + Returns true for each element if the element is a titlecased + string and there is at least one character, false otherwise. + + Call `str.istitle` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array_like of string_ or unicode_ + + Returns + ------- + out : ndarray + Output array of booleans + + See also + -------- + str.istitle + """ + return _vec_string(a, bool_, 'istitle') + +def isupper(a): + """ + Returns true for each element if all cased characters in the + string are uppercase and there is at least one character, false + otherwise. + + Call `str.isupper` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array_like of string_ or unicode_ + + Returns + ------- + out : ndarray + Output array of booleans + + See also + -------- + str.isupper + """ + return _vec_string(a, bool_, 'isupper') + +def join(sep, seq): + """ + Return a string which is the concatenation of the strings in the + sequence `seq`. + + Calls `str.join` element-wise. + + Parameters + ---------- + sep : array_like of string_ or unicode_ + seq : array_like of string_ or unicode_ + + Returns + ------- + out : ndarray + Output array of string_ or unicode_, depending on input types + + See also + -------- + str.join + """ + return _to_string_or_unicode_array( + _vec_string(sep, object_, 'join', (seq,))) + +if sys.version_info >= (2, 4): + def ljust(a, width, fillchar=' '): + """ + Return an array with the elements of `a` left-justified in a + string of length `width`. + + Calls `str.ljust` element-wise. + + Parameters + ---------- + a : array_like of string_ or unicode_ + width : int + The length of the resulting strings + fillchar : str or unicode, optional + The character to use for padding + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input type + + See also + -------- + str.ljust + """ + a_arr = numpy.asarray(a) + width_arr = numpy.asarray(width) + size = long(numpy.max(width_arr.flat)) + return _vec_string( + a_arr, (a_arr.dtype.type, size), 'ljust', (width_arr, fillchar)) +else: + def ljust(a, width): + """ + Return an array with the elements of `a` left-justified in a + string of length `width`. + + Calls `str.ljust` element-wise. + + Parameters + ---------- + a : array_like of string_ or unicode_ + width : int + The length of the resulting strings + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input type + + See also + -------- + str.ljust + """ + a_arr = numpy.asarray(a) + width_arr = numpy.asarray(width) + size = long(numpy.max(width_arr.flat)) + return _vec_string( + a_arr, (a_arr.dtype.type, size), 'ljust', (width_arr,)) + +def lower(a): + """ + Return an array with the elements of `a` converted to lowercase. + + Call `str.lower` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array-like of str or unicode + + Returns + ------- + out : ndarray, str or unicode + Output array of str or unicode, depending on input type + + See also + -------- + str.lower + + Examples + -------- + >>> c = np.array(['A1B C', '1BCA', 'BCA1']); c + array(['A1B C', '1BCA', 'BCA1'], + dtype='|S5') + >>> np.char.lower(c) + array(['a1b c', '1bca', 'bca1'], + dtype='|S5') + """ + a_arr = numpy.asarray(a) + return _vec_string(a_arr, a_arr.dtype, 'lower') + +def lstrip(a, chars=None): + """ + For each element in `a`, return a copy with the leading characters + removed. + + Calls `str.lstrip` element-wise. + + Parameters + ---------- + a : array-like of str or unicode + chars : str or unicode, optional + The `chars` argument is a string specifying the set of + characters to be removed. If omitted or None, the `chars` + argument defaults to removing whitespace. The `chars` argument + is not a prefix; rather, all combinations of its values are + stripped. + + Returns + ------- + out : ndarray, str or unicode + Output array of str or unicode, depending on input type + + See also + -------- + str.lstrip + + Examples + -------- + >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']) + >>> c + array(['aAaAaA', ' aA', 'abBABba'], + dtype='|S7') + >>> np.char.lstrip(c, 'a') # 'a' unstripped from c[1] because whitespace leading + array(['AaAaA', ' aA', 'bBABba'], + dtype='|S6') + >>> np.char.lstrip(c, 'A') # leaves c unchanged + array(['aAaAaA', ' aA', 'abBABba'], + dtype='|S7') + >>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, '')).all() + True + >>> (np.char.lstrip(c, ' ') == np.char.lstrip(c, None)).all() + True + """ + a_arr = numpy.asarray(a) + return _vec_string(a_arr, a_arr.dtype, 'lstrip', (chars,)) + +if sys.version_info >= (2, 5): + def partition(a, sep): + """ + Partition each element in `a` around `sep`. + + Calls `str.partition` element-wise. + + For each element in `a`, split the element as the first + occurrence of `sep`, and return 3 strings containing the part + before the separator, the separator itself, and the part after + the separator. If the separator is not found, return 3 strings + containing the string itself, followed by two empty strings. + + Parameters + ---------- + a : array-like of str or unicode + sep : str or unicode + + Returns + ------- + out : ndarray + Output array of string or unicode, depending on input + type. The output array will have an extra dimension with + 3 elements per input element. + + See also + -------- + str.partition + """ + return _to_string_or_unicode_array( + _vec_string(a, object_, 'partition', (sep,))) + +def replace(a, old, new, count=None): + """ + For each element in `a`, return a copy of the string with all + occurrences of substring `old` replaced by `new`. + + Calls `str.replace` element-wise. + + Parameters + ---------- + a : array-like of str or unicode + old, new : str or unicode + count : int, optional + If the optional argument `count` is given, only the first + `count` occurrences are replaced. + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input type + + See also + -------- + str.replace + """ + return _to_string_or_unicode_array( + _vec_string( + a, object_, 'replace', [old, new] +_clean_args(count))) + +def rfind(a, sub, start=0, end=None): + """ + For each element in `a`, return the highest index in the string + where substring `sub` is found, such that `sub` is contained + within [`start`, `end`]. + + Calls `str.rfind` element-wise. + + Parameters + ---------- + a : array-like of str or unicode + sub : str or unicode + start, end : int, optional + Optional arguments `start` and `end` are interpreted as in + slice notation. + + Returns + ------- + out : ndarray + Output array of integers. Return -1 on failure. + + See also + -------- + str.rfind + """ + return _vec_string( + a, integer, 'rfind', [sub, start] + _clean_args(end)) + +def rindex(a, sub, start=0, end=None): + """ + Like `rfind`, but raises `ValueError` when the substring `sub` is + not found. + + Calls `str.rindex` element-wise. + + Parameters + ---------- + a : array-like of str or unicode + sub : str or unicode + start, end : int, optional + + Returns + ------- + out : ndarray + Output array of integers. + + See also + -------- + rfind, str.rindex + """ + return _vec_string( + a, integer, 'rindex', [sub, start] + _clean_args(end)) + +if sys.version_info >= (2, 4): + def rjust(a, width, fillchar=' '): + """ + Return an array with the elements of `a` right-justified in a + string of length `width`. + + Calls `str.rjust` element-wise. + + Parameters + ---------- + a : array_like of string_ or unicode_ + width : int + The length of the resulting strings + fillchar : str or unicode, optional + The character to use for padding + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input type + + See also + -------- + str.rjust + """ + a_arr = numpy.asarray(a) + width_arr = numpy.asarray(width) + size = long(numpy.max(width_arr.flat)) + return _vec_string( + a_arr, (a_arr.dtype.type, size), 'rjust', (width_arr, fillchar)) +else: + def rjust(a, width): + """ + Return an array with the elements of `a` right-justified in a + string of length `width`. + + Calls `str.rjust` element-wise. + + Parameters + ---------- + a : array_like of string_ or unicode_ + width : int + The length of the resulting strings + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input type + + See also + -------- + str.rjust + """ + a_arr = numpy.asarray(a) + width_arr = numpy.asarray(width) + size = long(numpy.max(width_arr.flat)) + return _vec_string( + a_arr, (a_arr.dtype.type, size), 'rjust', (width,)) + +if sys.version_info >= (2, 5): + def rpartition(a, sep): + """ + Partition each element in `a` around `sep`. + + Calls `str.rpartition` element-wise. + + For each element in `a`, split the element as the last + occurrence of `sep`, and return 3 strings containing the part + before the separator, the separator itself, and the part after + the separator. If the separator is not found, return 3 strings + containing the string itself, followed by two empty strings. + + Parameters + ---------- + a : array-like of str or unicode + sep : str or unicode + + Returns + ------- + out : ndarray + Output array of string or unicode, depending on input + type. The output array will have an extra dimension with + 3 elements per input element. + + See also + -------- + str.rpartition + """ + return _to_string_or_unicode_array( + _vec_string(a, object_, 'rpartition', (sep,))) + +if sys.version_info >= (2, 4): + def rsplit(a, sep=None, maxsplit=None): + """ + For each element in `a`, return a list of the words in the + string, using `sep` as the delimiter string. + + Calls `str.rsplit` element-wise. + + Except for splitting from the right, `rsplit` + behaves like `split`. + + Parameters + ---------- + a : array_like of string_ or unicode_ + sep : str or unicode, optional + If `sep` is not specified or `None`, any whitespace string + is a separator. + maxsplit : int, optional + If `maxsplit` is given, at most `maxsplit` splits are done, + the rightmost ones. + + Returns + ------- + out : ndarray + Array of list objects + + See also + -------- + str.rsplit, split + """ + # This will return an array of lists of different sizes, so we + # leave it as an object array + return _vec_string( + a, object_, 'rsplit', [sep] + _clean_args(maxsplit)) + +def rstrip(a, chars=None): + """ + For each element in `a`, return a copy with the trailing + characters removed. + + Calls `str.rstrip` element-wise. + + Parameters + ---------- + a : array-like of str or unicode + chars : str or unicode, optional + The `chars` argument is a string specifying the set of + characters to be removed. If omitted or None, the `chars` + argument defaults to removing whitespace. The `chars` argument + is not a suffix; rather, all combinations of its values are + stripped. + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input type + + See also + -------- + str.rstrip + + Examples + -------- + >>> c = np.array(['aAaAaA', 'abBABba'], dtype='S7'); c + array(['aAaAaA', 'abBABba'], + dtype='|S7') + >>> np.char.rstrip(c, 'a') + array(['aAaAaA', 'abBABb'], + dtype='|S6') + >>> np.char.rstrip(c, 'A') + array(['aAaAa', 'abBABba'], + dtype='|S7') + """ + a_arr = numpy.asarray(a) + return _vec_string(a_arr, a_arr.dtype, 'rstrip', (chars,)) + +def split(a, sep=None, maxsplit=None): + """ + For each element in `a`, return a list of the words in the + string, using `sep` as the delimiter string. + + Calls `str.rsplit` element-wise. + + Parameters + ---------- + a : array_like of string_ or unicode_ + sep : str or unicode, optional + If `sep` is not specified or `None`, any whitespace string is a + separator. + maxsplit : int, optional + If `maxsplit` is given, at most `maxsplit` splits are done. + + Returns + ------- + out : ndarray + Array of list objects + + See also + -------- + str.split, rsplit + """ + # This will return an array of lists of different sizes, so we + # leave it as an object array + return _vec_string( + a, object_, 'split', [sep] + _clean_args(maxsplit)) + +def splitlines(a, keepends=None): + """ + For each element in `a`, return a list of the lines in the + element, breaking at line boundaries. + + Calls `str.splitlines` element-wise. + + Parameters + ---------- + a : array_like of string_ or unicode_ + keepends : bool, optional + Line breaks are not included in the resulting list unless + keepends is given and true. + + Returns + ------- + out : ndarray + Array of list objects + + See also + -------- + str.splitlines + """ + return _vec_string( + a, object_, 'splitlines', _clean_args(keepends)) + +def startswith(a, prefix, start=0, end=None): + """ + Returns a boolean array which is `True` where the string element + in `a` starts with `prefix`, otherwise `False`. + + Calls `str.startswith` element-wise. + + Parameters + ---------- + a : array_like of string or unicode + suffix : str + start, end : int, optional + end : int, optional + With optional `start`, test beginning at that position. With + optional `end`, stop comparing at that position. + + Returns + ------- + out : ndarray + Array of booleans + + See also + -------- + str.startswith + """ + return _vec_string( + a, bool_, 'startswith', [prefix, start] + _clean_args(end)) + +def strip(a, chars=None): + """ + For each element in `a`, return a copy with the leading and + trailing characters removed. + + Calls `str.rstrip` element-wise. + + Parameters + ---------- + a : array-like of str or unicode + chars : str or unicode, optional + The `chars` argument is a string specifying the set of + characters to be removed. If omitted or None, the `chars` + argument defaults to removing whitespace. The `chars` argument + is not a prefix or suffix; rather, all combinations of its + values are stripped. + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input type + + See also + -------- + str.strip + + Examples + -------- + >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']) + >>> c + array(['aAaAaA', ' aA', 'abBABba'], + dtype='|S7') + >>> np.char.strip(c) + array(['aAaAaA', 'aA', 'abBABba'], + dtype='|S7') + >>> np.char.strip(c, 'a') # 'a' unstripped from c[1] because whitespace leads + array(['AaAaA', ' aA', 'bBABb'], + dtype='|S6') + >>> np.char.strip(c, 'A') # 'A' unstripped from c[1] because (unprinted) ws trails + array(['aAaAa', ' aA', 'abBABba'], + dtype='|S7') + """ + a_arr = numpy.asarray(a) + return _vec_string(a_arr, a_arr.dtype, 'strip', _clean_args(chars)) + +def swapcase(a): + """ + For each element in `a`, return a copy of the string with + uppercase characters converted to lowercase and vice versa. + + Calls `str.swapcase` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array-like of str or unicode + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input type + + See also + -------- + str.swapcase + + Examples + -------- + >>> c=np.array(['a1B c','1b Ca','b Ca1','cA1b'],'S5'); c + array(['a1B c', '1b Ca', 'b Ca1', 'cA1b'], + dtype='|S5') + >>> np.char.swapcase(c) + array(['A1b C', '1B cA', 'B cA1', 'Ca1B'], + dtype='|S5') + """ + a_arr = numpy.asarray(a) + return _vec_string(a_arr, a_arr.dtype, 'swapcase') + +def title(a): + """ + For each element in `a`, return a titlecased version of the + string: words start with uppercase characters, all remaining cased + characters are lowercase. + + Calls `str.title` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array-like of str or unicode + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input type + + See also + -------- + str.title + + Examples + -------- + >>> c=np.array(['a1b c','1b ca','b ca1','ca1b'],'S5'); c + array(['a1b c', '1b ca', 'b ca1', 'ca1b'], + dtype='|S5') + >>> np.char.title(c) + chararray(['A1B C', '1B Ca', 'B Ca1', 'Ca1B'], + dtype='|S5') + """ + a_arr = numpy.asarray(a) + return _vec_string(a_arr, a_arr.dtype, 'title') + +def translate(a, table, deletechars=None): + """ + For each element in `a`, return a copy of the string where all + characters occurring in the optional argument `deletechars` are + removed, and the remaining characters have been mapped through the + given translation table. + + Calls `str.translate` element-wise. + + Parameters + ---------- + a : array-like of str or unicode + table : str of length 256 + deletechars : str + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input type + + See also + -------- + str.translate + """ + a_arr = numpy.asarray(a) + if issubclass(a_arr.dtype.type, unicode_): + return _vec_string( + a_arr, a_arr.dtype, 'translate', (table,)) + else: + return _vec_string( + a_arr, a_arr.dtype, 'translate', [table] + _clean_args(deletechars)) + +def upper(a): + """ + Return an array with the elements of `a` converted to uppercase. + + Calls `str.upper` element-wise. + + For 8-bit strings, this method is locale-dependent. + + Parameters + ---------- + a : array-like of str or unicode + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input type + + See also + -------- + str.upper + + Examples + -------- + >>> c = np.array(['a1b c', '1bca', 'bca1']); c + array(['a1b c', '1bca', 'bca1'], + dtype='|S5') + >>> numpy.char.upper(c) + array(['A1B C', '1BCA', 'BCA1'], + dtype='|S5') + """ + a_arr = numpy.asarray(a) + return _vec_string(a_arr, a_arr.dtype, 'upper') + +def zfill(a, width): + """ + Return the numeric string left-filled with zeros in a string of + length `width`. + + Calls `str.zfill` element-wise. + + Parameters + ---------- + a : array-like of str or unicode + width : int + + Returns + ------- + out : ndarray + Output array of str or unicode, depending on input type + + See also + -------- + str.zfill + """ + a_arr = numpy.asarray(a) + width_arr = numpy.asarray(width) + size = long(numpy.max(width_arr.flat)) + return _vec_string( + a_arr, (a_arr.dtype.type, size), 'zfill', (width_arr,)) + +def isnumeric(a): + """ + For each element in `a`, return True if there are only numeric + characters in the element. + + Calls `unicode.isnumeric` element-wise. + + Numeric characters include digit characters, and all characters + that have the Unicode numeric value property, e.g. ``U+2155, + VULGAR FRACTION ONE FIFTH``. + + Parameters + ---------- + a : array-like of unicode + + Returns + ------- + out : ndarray + Array of booleans + + See also + -------- + unicode.isnumeric + """ + if _use_unicode(a) != unicode_: + raise TypeError, "isnumeric is only available for Unicode strings and arrays" + return _vec_string(a, bool_, 'isnumeric') + +def isdecimal(a): + """ + For each element in `a`, return True if there are only decimal + characters in the element. + + Calls `unicode.isdecimal` element-wise. + + Decimal characters include digit characters, and all characters + that that can be used to form decimal-radix numbers, + e.g. ``U+0660, ARABIC-INDIC DIGIT ZERO``. + + Parameters + ---------- + a : array-like of unicode + + Returns + ------- + out : ndarray + Array of booleans + + See also + -------- + unicode.isdecimal + """ + if _use_unicode(a) != unicode_: + raise TypeError, "isnumeric is only available for Unicode strings and arrays" + return _vec_string(a, bool_, 'isdecimal') -# It also strips white-space on element retrieval and on -# comparisons class chararray(ndarray): """ chararray(shape, itemsize=1, unicode=False, buffer=None, offset=0, strides=None, order=None) - An array of fixed size (perhaps unicode) strings. + Provides a convenient view on arrays of string and unicode values. .. note:: - The chararray module exists for backwards compatibility with Numarray, - it is not recommended for new development. If one needs arrays of - strings, use arrays of `dtype` object. + This class is provided for numarray backward-compatibility. + New code (not concerned with numarray compatibility) should use + arrays of type object_, string_ or unicode_ and use the free + functions in :mod:`numpy.char <numpy.core.defchararray>` for + fast vectorized string operations instead. + + Versus a regular Numpy array of type 'string_' or 'unicode_', this + class adds the following functionality: - Create the array, using `buffer` (with `offset` and `strides`) if it is - not ``None``. If `buffer` is ``None``, then construct a new array with - `strides` in "C order," unless both ``len(shape) >= 2`` and - ``order='Fortran'``, in which case `strides` is in "Fortran order." + + 1) values automatically have whitespace removed from the end + when indexed + + 2) comparison operators automatically remove whitespace from the + end when comparing values + + 3) vectorized string operations are provided as methods + (e.g. `.endswith`) and infix operators (e.g. +, *, %) + + chararrays should be created using `numpy.char.array + <numpy.core.defchararray.array>` or `numpy.char.asarray + <numpy.core.defchararray.asarray>`, rather than this constructor + directly. + + Create the array, using `buffer` (with `offset` and `strides`) if + it is not ``None``. If `buffer` is ``None``, then construct a new + array with `strides` in "C order," unless both ``len(shape) >= 2`` + and ``order='Fortran'``, in which case `strides` is in "Fortran + order." Parameters ---------- shape : tuple - Shape of the array. + Shape of the array. itemsize : int_like, > 0, optional - Length of each array element, in number of characters. Default is 1. + Length of each array element, in number of characters. Default is 1. unicode : {True, False}, optional - Are the array elements of unicode-type (``True``) or string-type - (``False``, the default). + Are the array elements of unicode-type (``True``) or string-type + (``False``, the default). buffer : integer, > 0, optional - Memory address of the start of the array data. If ``None`` (the - default), a new array is created. + Memory address of the start of the array data. If ``None`` + (the default), a new array is created. offset : integer, >= 0, optional - Fixed stride displacement from the beginning of an axis? Default is - 0. + Fixed stride displacement from the beginning of an axis? + Default is 0. strides : array_like(?), optional - Strides for the array (see `numpy.ndarray.strides` for full - description), default is ``None``. + Strides for the array (see `numpy.ndarray.strides` for full + description), default is ``None``. order : {'C', 'F'}, optional - The order in which the array data is stored in memory: 'C' -> "row - major" order (the default), 'F' -> "column major" (Fortran) order + The order in which the array data is stored in memory: 'C' -> + "row major" order (the default), 'F' -> "column major" + (Fortran) order. Examples -------- @@ -87,7 +1684,6 @@ class chararray(ndarray): ['abc', 'abc', 'abc'], ['abc', 'abc', 'abc']], dtype='|S5') - """ def __new__(subtype, shape, itemsize=1, unicode=False, buffer=None, offset=0, strides=None, order='C'): @@ -98,6 +1694,11 @@ class chararray(ndarray): else: dtype = string_ + # force itemsize to be a Python long, since using Numpy integer + # types results in itemsize.itemsize being used as the size of + # strings in the new array. + itemsize = long(itemsize) + _globalvar = 1 if buffer is None: self = ndarray.__new__(subtype, shape, (dtype, itemsize), @@ -117,87 +1718,134 @@ class chararray(ndarray): def __getitem__(self, obj): val = ndarray.__getitem__(self, obj) - if isinstance(val, (string_, unicode_)): + if issubclass(val.dtype.type, character): temp = val.rstrip() - if len(temp) == 0: + if _len(temp) == 0: val = '' else: val = temp return val + # IMPLEMENTATION NOTE: Most of the methods of this class are + # direct delegations to the free functions in this module. + # However, those that return an array of strings should instead + # return a chararray, so some extra wrapping is required. + def __eq__(self, other): - return compare_chararrays(self, other, '==', True) + """ + Return (self == other) element-wise. + + See also + -------- + equal + """ + return equal(self, other) def __ne__(self, other): - return compare_chararrays(self, other, '!=', True) + """ + Return (self != other) element-wise. + + See also + -------- + not_equal + """ + return not_equal(self, other) def __ge__(self, other): - return compare_chararrays(self, other, '>=', True) + """ + Return (self >= other) element-wise. + + See also + -------- + greater_equal + """ + return greater_equal(self, other) def __le__(self, other): - return compare_chararrays(self, other, '<=', True) + """ + Return (self <= other) element-wise. + + See also + -------- + less_equal + """ + return less_equal(self, other) def __gt__(self, other): - return compare_chararrays(self, other, '>', True) + """ + Return (self > other) element-wise. + + See also + -------- + greater + """ + return greater(self, other) def __lt__(self, other): - return compare_chararrays(self, other, '<', True) + """ + Return (self < other) element-wise. + + See also + -------- + less + """ + return less(self, other) def __add__(self, other): - b = broadcast(self, other) - arr = b.iters[1].base - outitem = self.itemsize + arr.itemsize - result = chararray(b.shape, outitem, self.dtype is unicode_) - res = result.flat - for k, val in enumerate(b): - res[k] = (val[0] + val[1]) - return result + """ + Return (self + other), that is string concatenation, + element-wise for a pair of array_likes of string_ or unicode_. + + See also + -------- + add + """ + return asarray(add(self, other)) def __radd__(self, other): - b = broadcast(other, self) - outitem = b.iters[0].base.itemsize + \ - b.iters[1].base.itemsize - result = chararray(b.shape, outitem, self.dtype is unicode_) - res = result.flat - for k, val in enumerate(b): - res[k] = (val[0] + val[1]) - return result - - def __mul__(self, other): - b = broadcast(self, other) - arr = b.iters[1].base - if not issubclass(arr.dtype.type, integer): - raise ValueError, "Can only multiply by integers" - outitem = b.iters[0].base.itemsize * arr.max() - result = chararray(b.shape, outitem, self.dtype is unicode_) - res = result.flat - for k, val in enumerate(b): - res[k] = val[0]*val[1] - return result - - def __rmul__(self, other): - b = broadcast(self, other) - arr = b.iters[1].base - if not issubclass(arr.dtype.type, integer): - raise ValueError, "Can only multiply by integers" - outitem = b.iters[0].base.itemsize * arr.max() - result = chararray(b.shape, outitem, self.dtype is unicode_) - res = result.flat - for k, val in enumerate(b): - res[k] = val[0]*val[1] - return result - - def __mod__(self, other): - b = broadcast(self, other) - res = [None]*b.size - maxsize = -1 - for k,val in enumerate(b): - newval = val[0] % val[1] - maxsize = max(len(newval), maxsize) - res[k] = newval - newarr = chararray(b.shape, maxsize, self.dtype is unicode_) - newarr[:] = res - return newarr + """ + Return (other + self), that is string concatenation, + element-wise for a pair of array_likes of string_ or unicode_. + + See also + -------- + add + """ + return asarray(add(numpy.asarray(other), self)) + + def __mul__(self, i): + """ + Return (self * i), that is string multiple concatenation, + element-wise. + + See also + -------- + multiply + """ + return asarray(multiply(self, i)) + + def __rmul__(self, i): + """ + Return (self * i), that is string multiple concatenation, + element-wise. + + See also + -------- + multiply + """ + return asarray(multiply(self, i)) + + def __mod__(self, i): + """ + Return (self % i), that is pre-Python 2.6 string formatting + (iterpolation), element-wise for a pair of array_likes of string_ + or unicode_. + + See also + -------- + mod + """ + return asarray(mod(self, i)) def __rmod__(self, other): return NotImplemented @@ -221,587 +1869,659 @@ class chararray(ndarray): """ return self.__array__().argsort(axis, kind, order) - - def _generalmethod(self, name, myiter): - res = [None]*myiter.size - maxsize = -1 - for k, val in enumerate(myiter): - newval = [] - for chk in val[1:]: - if not chk or (chk.dtype is object_ and chk.item() is None): - break - newval.append(chk) - newitem = getattr(val[0],name)(*newval) - maxsize = max(len(newitem), maxsize) - res[k] = newitem - newarr = chararray(myiter.shape, maxsize, self.dtype is unicode_) - newarr[:] = res - return newarr - - def _typedmethod(self, name, myiter, dtype): - result = empty(myiter.shape, dtype=dtype) - res = result.flat - for k, val in enumerate(myiter): - newval = [] - for chk in val[1:]: - if not chk or (chk.dtype is object_ and chk.item() is None): - break - newval.append(chk) - this_str = val[0].rstrip('\x00') - newitem = getattr(this_str,name)(*newval) - res[k] = newitem - return result - - def _samemethod(self, name): - result = self.copy() - res = result.flat - for k, val in enumerate(self.flat): - res[k] = getattr(val, name)() - return result + argsort.__doc__ = ndarray.argsort.__doc__ def capitalize(self): """ - Capitalize the first character of each array element. - - For each element of `self`, if the first character is a letter - possessing both "upper-case" and "lower-case" forms, and it is - presently in lower-case, change it to upper-case; otherwise, leave - it untouched. - - Parameters - ---------- - None - - Returns - ------- - ret : chararray - `self` with each element "title-cased." + Return a copy of `self` with only the first character of each element + capitalized. - Examples + See also -------- - >>> c = np.array(['a1b2','1b2a','b2a1','2a1b'],'S4').view(np.chararray); c - chararray(['a1b2', '1b2a', 'b2a1', '2a1b'], - dtype='|S4') - >>> c.capitalize() - chararray(['A1b2', '1b2a', 'B2a1', '2a1b'], - dtype='|S4') - + capitalize """ - return self._samemethod('capitalize') + return asarray(capitalize(self)) - if sys.version[:3] >= '2.4': + if sys.version_info >= (2, 4): def center(self, width, fillchar=' '): - return self._generalmethod('center', - broadcast(self, width, fillchar)) - def ljust(self, width, fillchar=' '): - return self._generalmethod('ljust', - broadcast(self, width, fillchar)) - def rjust(self, width, fillchar=' '): - return self._generalmethod('rjust', - broadcast(self, width, fillchar)) - def rsplit(self, sep=None, maxsplit=None): - return self._typedmethod('rsplit', broadcast(self, sep, maxsplit), - object) + """ + Return a copy of `self` with its elements centered in a + string of length `width`. + + See also + -------- + center + """ + return asarray(center(self, width, fillchar)) else: - def ljust(self, width): - return self._generalmethod('ljust', broadcast(self, width)) - def rjust(self, width): - return self._generalmethod('rjust', broadcast(self, width)) def center(self, width): - return self._generalmethod('center', broadcast(self, width)) + """ + Return a copy of `self` with its elements centered in a + string of length `width`. - def count(self, sub, start=None, end=None): - """ - Return the number of occurrences of a sub-string in each array element. + See also + -------- + center + """ + return asarray(center(self, width)) - Parameters - ---------- - sub : string - The sub-string to count. - start : int, optional - The string index at which to start counting in each element. - end : int, optional - The string index at which to end counting in each element. - - Returns - ------- - ret : ndarray of ints - Array whose elements are the number of occurrences of `sub` in each - element of `self`. + def count(self, sub, start=0, end=None): + """ + Returns an array with the number of non-overlapping occurrences of + substring `sub` in the range [`start`, `end`]. - Examples + See also -------- - >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']).view(np.chararray) - >>> c - chararray(['aAaAaA', ' aA', 'abBABba'], - dtype='|S7') - >>> c.count('A') - array([3, 1, 1]) - >>> c.count('aA') - array([3, 1, 0]) - >>> c.count('A', start=1, end=4) - array([2, 1, 1]) - >>> c.count('A', start=1, end=3) - array([1, 0, 0]) - + count """ - return self._typedmethod('count', broadcast(self, sub, start, end), int) + return count(self, sub, start, end) + - def decode(self,encoding=None,errors=None): + def decode(self, encoding=None, errors=None): """ - Return elements decoded according to the value of `encoding`. + Calls `str.decode` element-wise. - Parameters - ---------- - encoding : string, optional - The encoding to use; for a list of acceptable values, see the - Python docstring for the package 'encodings' - error : Python exception object?, optional - The exception to raise if decoding fails? + See also + -------- + decode + """ + return decode(self, encoding, errors) - Returns - ------- - ret : chararray - A view of `self`, suitably decoded. + def encode(self, encoding=None, errors=None): + """ + Calls `str.encode` element-wise. - See Also + See also -------- encode - encodings - (package) - - Examples - -------- - >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']).view(np.chararray) - >>> c - chararray(['aAaAaA', ' aA', 'abBABba'], - dtype='|S7') - >>> c = c.encode(encoding='cp037'); c - chararray(['\\x81\\xc1\\x81\\xc1\\x81\\xc1', '@@\\x81\\xc1@@', - '\\x81\\x82\\xc2\\xc1\\xc2\\x82\\x81'], - dtype='|S7') - >>> c.decode(encoding='cp037') - chararray(['aAaAaA', ' aA', 'abBABba'], - dtype='|S7') - """ - return self._generalmethod('decode', broadcast(self, encoding, errors)) + return encode(self, encoding, errors) - def encode(self,encoding=None,errors=None): + def endswith(self, suffix, start=0, end=None): """ - Return elements encoded according to the value of `encoding`. - - Parameters - ---------- - encoding : string, optional - The encoding to use; for a list of acceptable values, see the - Python docstring for `encodings`. - error : Python exception object, optional - The exception to raise if encoding fails. + Returns a boolean array which is `True` where the string element + in `self` ends with `suffix`, otherwise `False`. - Returns - ------- - ret : chararray - A view of `self`, suitably encoded. - - See Also - -------- - decode - - Examples + See also -------- - >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']).view(np.chararray) - >>> c - chararray(['aAaAaA', ' aA', 'abBABba'], - dtype='|S7') - >>> c.encode(encoding='cp037') - chararray(['\\x81\\xc1\\x81\\xc1\\x81\\xc1', '@@\\x81\\xc1@@', - '\\x81\\x82\\xc2\\xc1\\xc2\\x82\\x81'], - dtype='|S7') - + endswith """ - return self._generalmethod('encode', broadcast(self, encoding, errors)) + return endswith(self, suffix, start, end) - def endswith(self, suffix, start=None, end=None): + def expandtabs(self, tabsize=8): """ - Check whether elements end with specified suffix - - Given an array of strings, return a new bool array of same shape with - the result of comparing suffix against each element; each element - of bool array is ``True`` if element ends with specified suffix and - ``False`` otherwise. + Return a copy of each string element where all tab characters are + replaced by one or more spaces. - Parameters - ---------- - suffix : string - Compare each element in array to this. - start : int, optional - For each element, start searching from this position. - end : int, optional - For each element, stop comparing at this position. + See also + -------- + expandtabs + """ + return asarray(expandtabs(self, tabsize)) - Returns - ------- - endswith : ndarray - Output array of bools + def find(self, sub, start=0, end=None): + """ + For each element, return the lowest index in the string where + substring `sub` is found. - See Also + See also -------- - count find - index - startswith - - Examples - -------- - >>> s = chararray(3, itemsize=3) - >>> s[0] = 'foo' - >>> s[1] = 'bar' - >>> s - chararray(['foo', 'bar'], - dtype='|S3') - >>> s.endswith('ar') - array([False, True], dtype=bool) - >>> s.endswith('a', start=1, end=2) - array([False, True], dtype=bool) - """ - return self._typedmethod('endswith', broadcast(self, suffix, start, end), bool) - - def expandtabs(self, tabsize=None): - return self._generalmethod('endswith', broadcast(self, tabsize)) - - def find(self, sub, start=None, end=None): - return self._typedmethod('find', broadcast(self, sub, start, end), int) + return find(self, sub, start, end) - def index(self, sub, start=None, end=None): - return self._typedmethod('index', broadcast(self, sub, start, end), int) + def index(self, sub, start=0, end=None): + """ + Like `find`, but raises `ValueError` when the substring is not found. - def _ismethod(self, name): - result = empty(self.shape, dtype=bool) - res = result.flat - for k, val in enumerate(self.flat): - item = val.rstrip('\x00') - res[k] = getattr(item, name)() - return result + See also + -------- + index + """ + return index(self, sub, start, end) def isalnum(self): - return self._ismethod('isalnum') + """ + Returns true for each element if all characters in the string + are alphanumeric and there is at least one character, false + otherwise. + + See also + -------- + isalnum + """ + return isalnum(self) def isalpha(self): - return self._ismethod('isalpha') + """ + Returns true for each element if all characters in the string + are alphabetic and there is at least one character, false + otherwise. + + See also + -------- + isalpha + """ + return isalpha(self) def isdigit(self): - return self._ismethod('isdigit') + """ + Returns true for each element if all characters in the string are + digits and there is at least one character, false otherwise. + + See also + -------- + isdigit + """ + return isdigit(self) def islower(self): - return self._ismethod('islower') + """ + Returns true for each element if all cased characters in the + string are lowercase and there is at least one cased character, + false otherwise. + + See also + -------- + islower + """ + return islower(self) def isspace(self): - return self._ismethod('isspace') + """ + Returns true for each element if there are only whitespace + characters in the string and there is at least one character, + false otherwise. + + See also + -------- + isspace + """ + return isspace(self) def istitle(self): - return self._ismethod('istitle') + """ + Returns true for each element if the element is a titlecased + string and there is at least one character, false otherwise. + + See also + -------- + istitle + """ + return istitle(self) def isupper(self): - return self._ismethod('isupper') + """ + Returns true for each element if all cased characters in the + string are uppercase and there is at least one character, false + otherwise. - def join(self, seq): - return self._generalmethod('join', broadcast(self, seq)) + See also + -------- + isupper + """ + return isupper(self) - def lower(self): + def join(self, seq): """ - Assure that every character of each array element is lower-case. + Return a string which is the concatenation of the strings in the + sequence `seq`. - For each character possessing both "upper-case" and "lower-case" forms, - if it is in upper-case, change it to lower; otherwise, leave it unchanged. + See also + -------- + join + """ + return join(self, seq) - Parameters - ---------- - None + if sys.version_info >= (2, 4): + def ljust(self, width, fillchar=' '): + """ + Return an array with the elements of `self` left-justified in a + string of length `width`. + + See also + -------- + ljust + """ + return asarray(ljust(self, width, fillchar)) + else: + def ljust(self, width): + """ + Return an array with the elements of `self` left-justified in a + string of length `width`. - Returns - ------- - ret : chararray - `self` with all capital letters changed to lower-case. + See also + -------- + ljust + """ + return asarray(ljust(self, width)) - Examples + def lower(self): + """ + Return an array with the elements of `self` converted to + lowercase. + See also -------- - >>> c = np.array(['A1B C', '1BCA', 'BCA1']).view(np.chararray); c - chararray(['A1B C', '1BCA', 'BCA1'], - dtype='|S5') - >>> c.lower() - chararray(['a1b c', '1bca', 'bca1'], - dtype='|S5') - + lower """ - return self._samemethod('lower') + return asarray(lower(self)) - def lstrip(self, chars): + def lstrip(self, chars=None): """ - Remove leading characters from each element. + For each element in `self`, return a copy with the leading characters + removed. - Returns a view of ``self`` with `chars` stripped from the start of - each element. Note: **No Default** - `chars` must be specified (but if - it is explicitly ``None`` or the empty string '', leading whitespace is - removed). + See also + -------- + lstrip + """ + return asarray(lstrip(self, chars)) - Parameters - ---------- - chars : string_like or None - Character(s) to strip; whitespace stripped if `chars` == ``None`` - or `chars` == ''. + if sys.version_info >= (2, 5): + def partition(self, sep): + """ + Partition each element in `self` around `sep`. - Returns - ------- - ret : chararray - View of ``self``, each element suitably stripped. + See also + -------- + partition + """ + return asarray(partition(self, sep)) - Raises - ------ - TypeError: lstrip() takes exactly 2 arguments (1 given) - If `chars` is not supplied. + def replace(self, old, new, count=None): + """ + For each element in `self`, return a copy of the string with all + occurrences of substring `old` replaced by `new`. - Examples + See also -------- - >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']).view(np.chararray) - >>> c - chararray(['aAaAaA', ' aA', 'abBABba'], - dtype='|S7') - >>> c.lstrip('a') # 'a' unstripped from c[1] because whitespace leading - chararray(['AaAaA', ' aA', 'bBABba'], - dtype='|S6') - >>> c.lstrip('A') # leaves c unchanged - chararray(['aAaAaA', ' aA', 'abBABba'], - dtype='|S7') - >>> (c.lstrip(' ') == c.lstrip('')).all() - True - >>> (c.lstrip(' ') == c.lstrip(None)).all() - True - + replace """ - return self._generalmethod('lstrip', broadcast(self, chars)) + return asarray(replace(self, old, new, count)) - def replace(self, old, new, count=None): - return self._generalmethod('replace', broadcast(self, old, new, count)) + def rfind(self, sub, start=0, end=None): + """ + For each element in `self`, return the highest index in the string + where substring `sub` is found, such that `sub` is contained + within [`start`, `end`]. - def rfind(self, sub, start=None, end=None): - return self._typedmethod('rfind', broadcast(self, sub, start, end), int) + See also + -------- + rfind + """ + return rfind(self, sub, start, end) - def rindex(self, sub, start=None, end=None): - return self._typedmethod('rindex', broadcast(self, sub, start, end), int) + def rindex(self, sub, start=0, end=None): + """ + Like `rfind`, but raises `ValueError` when the substring `sub` is + not found. - def rstrip(self, chars=None): + See also + -------- + rindex """ - Remove trailing characters. + return rindex(self, sub, start, end) - Returns a view of ``self`` with `chars` stripped from the end of each - element. + if sys.version_info >= (2, 4): + def rjust(self, width, fillchar=' '): + """ + Return an array with the elements of `self` + right-justified in a string of length `width`. + + See also + -------- + rjust + """ + return asarray(rjust(self, width, fillchar)) + else: + def rjust(self, width): + """ + Return an array with the elements of `self` + right-justified in a string of length `width`. + + See also + -------- + rjust + """ + return asarray(rjust(self, width)) + + if sys.version_info >= (2, 5): + def rpartition(self, sep): + """ + Partition each element in `self` around `sep`. + + See also + -------- + rpartition + """ + return asarray(rpartition(self, sep)) + + if sys.version_info >= (2, 4): + def rsplit(self, sep=None, maxsplit=None): + """ + For each element in `self`, return a list of the words in + the string, using `sep` as the delimiter string. - Parameters - ---------- - chars : string_like, optional, default=None - Character(s) to remove. + See also + -------- + rsplit + """ + return rsplit(self, sep, maxsplit) - Returns - ------- - ret : chararray - View of ``self``, each element suitably stripped. + def rstrip(self, chars=None): + """ + For each element in `self`, return a copy with the trailing + characters removed. - Examples + See also -------- - >>> c = np.array(['aAaAaA', 'abBABba'], dtype='S7').view(np.chararray); c - chararray(['aAaAaA', 'abBABba'], - dtype='|S7') - >>> c.rstrip('a') - chararray(['aAaAaA', 'abBABb'], - dtype='|S6') - >>> c.rstrip('A') - chararray(['aAaAa', 'abBABba'], - dtype='|S7') - + rstrip """ - return self._generalmethod('rstrip', broadcast(self, chars)) + return asarray(rstrip(self, chars)) def split(self, sep=None, maxsplit=None): - return self._typedmethod('split', broadcast(self, sep, maxsplit), object) + """ + For each element in `self`, return a list of the words in the + string, using `sep` as the delimiter string. - def splitlines(self, keepends=None): - return self._typedmethod('splitlines', broadcast(self, keepends), object) + See also + -------- + split + """ + return split(self, sep, maxsplit) - def startswith(self, prefix, start=None, end=None): - return self._typedmethod('startswith', broadcast(self, prefix, start, end), bool) + def splitlines(self, keepends=None): + """ + For each element in `self`, return a list of the lines in the + element, breaking at line boundaries. - def strip(self, chars=None): + See also + -------- + splitlines """ - Remove leading and trailing characters, whitespace by default. + return splitlines(self, keepends) - Returns a view of ``self`` with `chars` stripped from the start and end of - each element; by default leading and trailing whitespace is removed. + def startswith(self, prefix, start=0, end=None): + """ + Returns a boolean array which is `True` where the string element + in `self` starts with `prefix`, otherwise `False`. - Parameters - ---------- - chars : string_like, optional, default=None - Character(s) to strip; whitespace by default. + See also + -------- + startswith + """ + return startswith(self, prefix, start, end) - Returns - ------- - ret : chararray - View of ``self``, each element suitably stripped. + def strip(self, chars=None): + """ + For each element in `self`, return a copy with the leading and + trailing characters removed. - Examples + See also -------- - >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']).view(np.chararray) - >>> c - chararray(['aAaAaA', ' aA', 'abBABba'], - dtype='|S7') - >>> c.strip() - chararray(['aAaAaA', 'aA', 'abBABba'], - dtype='|S7') - >>> c.strip('a') # 'a' unstripped from c[1] because whitespace leads - chararray(['AaAaA', ' aA', 'bBABb'], - dtype='|S6') - >>> c.strip('A') # 'A' unstripped from c[1] because (unprinted) ws trails - chararray(['aAaAa', ' aA', 'abBABba'], - dtype='|S7') - + strip """ - return self._generalmethod('strip', broadcast(self, chars)) + return asarray(strip(self, chars)) def swapcase(self): """ - Switch upper-case letters to lower-case, and vice-versa. + For each element in `self`, return a copy of the string with + uppercase characters converted to lowercase and vice versa. - Parameters - ---------- - None + See also + -------- + swapcase + """ + return asarray(swapcase(self)) - Returns - ------- - ret : chararray - `self` with all lower-case letters capitalized and all upper-case - changed to lower case. + def title(self): + """ + For each element in `self`, return a titlecased version of the + string: words start with uppercase characters, all remaining cased + characters are lowercase. - Examples + See also -------- - >>> c=np.array(['a1B c','1b Ca','b Ca1','cA1b'],'S5').view(np.chararray);c - chararray(['a1B c', '1b Ca', 'b Ca1', 'cA1b'], - dtype='|S5') - >>> c.swapcase() - chararray(['A1b C', '1B cA', 'B cA1', 'Ca1B'], - dtype='|S5') + title + """ + return asarray(title(self)) + def translate(self, table, deletechars=None): """ - return self._samemethod('swapcase') + For each element in `self`, return a copy of the string where + all characters occurring in the optional argument + `deletechars` are removed, and the remaining characters have + been mapped through the given translation table. - def title(self): + See also + -------- + translate """ - Capitalize the first character of each array element. + return asarray(translate(self, table, deletechars)) - For each element of `self`, if the first character is a letter - possessing both "upper-case" and "lower-case" forms, and it is - presently in lower-case, change it to upper-case; otherwise, leave - it untouched. + def upper(self): + """ + Return an array with the elements of `self` converted to + uppercase. - Parameters - ---------- - None + See also + -------- + upper + """ + return asarray(upper(self)) - Returns - ------- - ret : chararray - `self` with + def zfill(self, width): + """ + Return the numeric string left-filled with zeros in a string of + length `width`. - Examples + See also -------- - >>> c=np.array(['a1b c','1b ca','b ca1','ca1b'],'S5').view(np.chararray);c - chararray(['a1b c', '1b ca', 'b ca1', 'ca1b'], - dtype='|S5') - >>> c.title() - chararray(['A1B C', '1B Ca', 'B Ca1', 'Ca1B'], - dtype='|S5') + zfill + """ + return asarray(zfill(self, width)) + def isnumeric(self): """ - return self._samemethod('title') + For each element in `self`, return True if there are only + numeric characters in the element. - def translate(self, table, deletechars=None): - if self.dtype is unicode_: - return self._generalmethod('translate', broadcast(self, table)) - else: - return self._generalmethod('translate', broadcast(self, table, deletechars)) + See also + -------- + isnumeric + """ + return isnumeric(self) - def upper(self): + def isdecimal(self): """ - Capitalize every character of each array element. + For each element in `self`, return True if there are only + decimal characters in the element. - For each character possessing both "upper-case" and "lower-case" forms, - if it is in lower-case, change it to upper; otherwise, leave it unchanged. + See also + -------- + isdecimal + """ + return isdecimal(self) - Parameters - ---------- - None - Returns - ------- - ret : chararray - `self` with all characters capitalized. +def array(obj, itemsize=None, copy=True, unicode=None, order=None): + """ + Create a `chararray`. - Examples - -------- - >>> c = np.array(['a1b c', '1bca', 'bca1']).view(np.chararray); c - chararray(['a1b c', '1bca', 'bca1'], - dtype='|S5') - >>> c.upper() - chararray(['A1B C', '1BCA', 'BCA1'], - dtype='|S5') + .. note:: + This class is provided for numarray backward-compatibility. + New code (not concerned with numarray compatibility) should use + arrays of type object_, string_ or unicode_ and use the free + functions in :mod:`numpy.char <numpy.core.defchararray>` for + fast vectorized string operations instead. - """ - return self._samemethod('upper') + Versus a regular Numpy array of type `string_` or `unicode_`, this + class adds the following functionality: - def zfill(self, width): - return self._generalmethod('zfill', broadcast(self, width)) + 1) values automatically have whitespace removed from the end + when indexed + 2) comparison operators automatically remove whitespace from the + end when comparing values -def array(obj, itemsize=None, copy=True, unicode=False, order=None): + 3) vectorized string operations are provided as methods + (e.g. `str.endswith`) and infix operators (e.g. +, *, %) + + Parameters + ---------- + obj : array of str or unicode-like + + itemsize : int, optional + `itemsize` is the number of characters per scalar in the + resulting array. If `itemsize` is None, and `obj` is an + object array or a Python list, the `itemsize` will be + automatically determined. If `itemsize` is provided and `obj` + is of type `str` or `unicode`, then the `obj` string will be + chunked into `itemsize` pieces. + + copy : bool, optional + If true (default), then the object is copied. Otherwise, a copy + will only be made if __array__ returns a copy, if obj is a + nested sequence, or if a copy is needed to satisfy any of the other + requirements (`itemsize`, `unicode`, `order`, etc.). + + unicode : bool, optional + When true, the resulting `chararray` can contain Unicode + characters, when false only 8-bit characters. If `unicode` is + `None` and `obj` is one of the following: + + - a `chararray`, + - an ndarray of type `string_` or `unicode_` + - a Python str or unicode object, + + then the unicode setting of the output array will be + automatically determined. + + order : {'C', 'F', 'A'}, optional + Specify the order of the array. If order is 'C' (default), then the + array will be in C-contiguous order (last-index varies the + fastest). If order is 'F', then the returned array + will be in Fortran-contiguous order (first-index varies the + fastest). If order is 'A', then the returned array may + be in any order (either C-, Fortran-contiguous, or even + discontiguous). + """ + if isinstance(obj, (str, _unicode)): + if unicode is None: + if isinstance(obj, _unicode): + unicode = True + else: + unicode = False + if itemsize is None: + itemsize = _len(obj) + shape = _len(obj) / itemsize + return chararray(shape, itemsize=itemsize, unicode=unicode, + buffer=obj, order=order) + + if isinstance(obj, (list, tuple)): + obj = numpy.asarray(obj) + + if isinstance(obj, ndarray) and issubclass(obj.dtype.type, character): + # If we just have a vanilla chararray, create a chararray + # view around it. + if not isinstance(obj, chararray): + obj = obj.view(chararray) - if isinstance(obj, chararray): if itemsize is None: itemsize = obj.itemsize - if copy or (itemsize != obj.itemsize) \ - or (not unicode and obj.dtype == unicode_) \ - or (unicode and obj.dtype == string_): - return obj.astype("%s%d" % (obj.dtype.char, itemsize)) + # itemsize is in 8-bit chars, so for Unicode, we need + # to divide by the size of a single Unicode character, + # which for Numpy is always 4 + if issubclass(obj.dtype.type, unicode_): + itemsize /= 4 + + if unicode is None: + if issubclass(obj.dtype.type, unicode_): + unicode = True + else: + unicode = False + + if unicode: + dtype = unicode_ else: - return obj + dtype = string_ - if isinstance(obj, ndarray) and (obj.dtype in [unicode_, string_]): - new = obj.view(chararray) - if unicode and obj.dtype == string_: - return new.astype((unicode_, obj.itemsize)) - elif obj.dtype == unicode_: - return new.astype((string_, obj.itemsize)) + if order is not None: + obj = numpy.asarray(obj, order=order) + if (copy + or (itemsize != obj.itemsize) + or (not unicode and isinstance(obj, unicode_)) + or (unicode and isinstance(obj, string_))): + obj = obj.astype((dtype, long(itemsize))) + return obj - if copy: return new.copy() - else: return new + if isinstance(obj, ndarray) and issubclass(obj.dtype.type, object): + if itemsize is None: + # Since no itemsize was specified, convert the input array to + # a list so the ndarray constructor will automatically + # determine the itemsize for us. + obj = obj.tolist() + # Fall through to the default case + + if unicode: + dtype = unicode_ + else: + dtype = string_ - if unicode: dtype = "U" - else: dtype = "S" + if itemsize is None: + val = narray(obj, dtype=dtype, order=order, subok=True) + else: + val = narray(obj, dtype=(dtype, itemsize), order=order, subok=True) + return val.view(chararray) - if itemsize is not None: - dtype += str(itemsize) - if isinstance(obj, (str, _unicode)): - if itemsize is None: - itemsize = len(obj) - shape = len(obj) / itemsize - return chararray(shape, itemsize=itemsize, unicode=unicode, - buffer=obj) +def asarray(obj, itemsize=None, unicode=None, order=None): + """ + Convert the input to a `chararray`, copying the data only if + necessary. - # default - val = narray(obj, dtype=dtype, order=order, subok=1) + Versus a regular Numpy array of type `string_` or `unicode_`, this + class adds the following functionality: - return val.view(chararray) + 1) values automatically have whitespace removed from the end + when indexed + + 2) comparison operators automatically remove whitespace from the + end when comparing values + + 3) vectorized string operations are provided as methods + (e.g. `str.endswith`) and infix operators (e.g. +, *, %) + + Parameters + ---------- + obj : array of str or unicode-like -def asarray(obj, itemsize=None, unicode=False, order=None): + itemsize : int, optional + `itemsize` is the number of characters per scalar in the + resulting array. If `itemsize` is None, and `obj` is an + object array or a Python list, the `itemsize` will be + automatically determined. If `itemsize` is provided and `obj` + is of type `str` or `unicode`, then the `obj` string will be + chunked into `itemsize` pieces. + + unicode : bool, optional + When true, the resulting `chararray` can contain Unicode + characters, when false only 8-bit characters. If `unicode` is + `None` and `obj` is one of the following: + + - a `chararray`, + - an ndarray of type `string_` or 'unicode_` + - a Python str or unicode object, + + then the unicode setting of the output array will be + automatically determined. + + order : {'C', 'F'}, optional + Specify the order of the array. If order is 'C' (default), then the + array will be in C-contiguous order (last-index varies the + fastest). If order is 'F', then the returned array + will be in Fortran-contiguous order (first-index varies the + fastest). + """ return array(obj, itemsize, copy=False, unicode=unicode, order=order) diff --git a/numpy/core/src/multiarray/arraytypes.c.src b/numpy/core/src/multiarray/arraytypes.c.src index 1021f8d7f..9cff6836e 100644 --- a/numpy/core/src/multiarray/arraytypes.c.src +++ b/numpy/core/src/multiarray/arraytypes.c.src @@ -1333,7 +1333,7 @@ static void * longlong, ulonglong, float, double, longdouble, * cfloat, cdouble, clongdouble, char, char, char, datetime, * timedelta# - * #skip = 1*17, aip->descr->elsize*3, 1*2# + * #skip = 1*17, aop->descr->elsize*3, 1*2# */ static void OBJECT_to_@TOTYPE@(PyObject **ip, @totype@ *op, intp n, @@ -1389,7 +1389,10 @@ static void return; } } - @to@_setitem(temp,(char *)op, aop); + if (@to@_setitem(temp,(char *)op, aop)) { + Py_DECREF(temp); + return; + } Py_DECREF(temp); } } @@ -1417,7 +1420,10 @@ static void Py_INCREF(Py_False); temp = Py_False; } - @to@_setitem(temp,(char *)op, aop); + if (@to@_setitem(temp,(char *)op, aop)) { + Py_DECREF(temp); + return; + } Py_DECREF(temp); } } @@ -3146,7 +3152,7 @@ NPY_NO_EXPORT PyArray_Descr @from@_Descr = { /**end repeat**/ static void -_init_datetime_descr(PyArray_Descr *descr) +_init_datetime_descr(PyArray_Descr *descr) { PyArray_DatetimeMetaData *dt_data; PyObject *cobj; @@ -3156,12 +3162,12 @@ _init_datetime_descr(PyArray_Descr *descr) dt_data->num = 1; dt_data->den = 1; dt_data->events = 1; - + cobj = PyCObject_FromVoidPtr((void *)dt_data, _pya_free); descr->metadata = PyDict_New(); PyDict_SetItemString(descr->metadata, NPY_METADATA_DTSTR, cobj); Py_DECREF(cobj); - + } #define _MAX_LETTER 128 @@ -3251,7 +3257,7 @@ PyArray_DescrFromType(int type) _init_datetime_descr(ret); } } - + return ret; } diff --git a/numpy/core/src/multiarray/common.c b/numpy/core/src/multiarray/common.c index ae1d264f7..80b676a38 100644 --- a/numpy/core/src/multiarray/common.c +++ b/numpy/core/src/multiarray/common.c @@ -8,7 +8,7 @@ #include "npy_config.h" #include "usertypes.h" - + #include "common.h" /* @@ -66,6 +66,10 @@ _array_small_type(PyArray_Descr *chktype, PyArray_Descr* mintype) mintype->type_num == PyArray_STRING) { testsize = MAX(chksize, 4*minsize); } + else if (chktype->type_num == PyArray_STRING && + mintype->type_num == PyArray_UNICODE) { + testsize = MAX(chksize*4, minsize); + } else { testsize = MAX(chksize, minsize); } diff --git a/numpy/core/src/multiarray/multiarraymodule.c b/numpy/core/src/multiarray/multiarraymodule.c index 17c72aa6b..30def71e3 100644 --- a/numpy/core/src/multiarray/multiarraymodule.c +++ b/numpy/core/src/multiarray/multiarraymodule.c @@ -1796,7 +1796,7 @@ static PyObject *array_correlate(PyObject *NPY_UNUSED(dummy), PyObject *args, Py } static PyObject* -array_correlate2(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *kwds) +array_correlate2(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *kwds) { PyObject *shape, *a0; int mode = 0; @@ -1979,10 +1979,10 @@ array_set_datetimeparse_function(PyObject *NPY_UNUSED(dummy), PyObject *args, Py PyErr_SetString(PyExc_TypeError, "Argument must be callable."); return NULL; } - Py_INCREF(op); + Py_INCREF(op); } PyArray_SetDatetimeParseFunction(op); - Py_DECREF(op); + Py_DECREF(op); Py_INCREF(Py_None); return Py_None; } @@ -2316,6 +2316,221 @@ compare_chararrays(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *kwds) return NULL; } +static PyObject * +_vec_string_with_args(PyArrayObject* char_array, PyArray_Descr* type, + PyObject* method, PyObject* args) +{ + PyObject* broadcast_args[NPY_MAXARGS]; + PyArrayMultiIterObject* in_iter = NULL; + PyArrayObject* result = NULL; + PyArrayIterObject* out_iter = NULL; + PyObject* args_tuple = NULL; + Py_ssize_t i, n, nargs; + + nargs = PySequence_Size(args) + 1; + if (nargs == -1 || nargs > NPY_MAXARGS) { + PyErr_Format(PyExc_ValueError, "len(args) must be < %d", NPY_MAXARGS - 1); + goto err; + } + + broadcast_args[0] = (PyObject*)char_array; + for (i = 1; i < nargs; i++) { + PyObject* item = PySequence_GetItem(args, i-1); + if (item == NULL) { + goto err; + } + broadcast_args[i] = item; + Py_DECREF(item); + } + in_iter = (PyArrayMultiIterObject*)PyArray_MultiIterFromObjects + (broadcast_args, nargs, 0); + if (in_iter == NULL) { + goto err; + } + n = in_iter->numiter; + + result = (PyArrayObject*)PyArray_SimpleNewFromDescr + (in_iter->nd, in_iter->dimensions, type); + if (result == NULL) { + goto err; + } + + out_iter = (PyArrayIterObject*)PyArray_IterNew((PyObject*)result); + if (out_iter == NULL) { + goto err; + } + + args_tuple = PyTuple_New(n); + if (args_tuple == NULL) { + goto err; + } + + while (PyArray_MultiIter_NOTDONE(in_iter)) { + for (i = 0; i < n; i++) { + PyArrayIterObject* it = in_iter->iters[i]; + PyObject* arg = PyArray_ToScalar(PyArray_ITER_DATA(it), it->ao); + if (arg == NULL) { + goto err; + } + PyTuple_SetItem(args_tuple, i, arg); /* Steals ref to arg */ + } + + PyObject* item_result = PyObject_CallObject(method, args_tuple); + if (item_result == NULL) { + goto err; + } + + if (PyArray_SETITEM(result, PyArray_ITER_DATA(out_iter), item_result)) { + Py_DECREF(item_result); + PyErr_SetString( + PyExc_TypeError, + "result array type does not match underlying function"); + goto err; + } + Py_DECREF(item_result); + + PyArray_MultiIter_NEXT(in_iter); + PyArray_ITER_NEXT(out_iter); + } + + Py_DECREF(in_iter); + Py_DECREF(out_iter); + Py_DECREF(args_tuple); + + return (PyObject*)result; + + err: + Py_XDECREF(in_iter); + Py_XDECREF(out_iter); + Py_XDECREF(args_tuple); + Py_XDECREF(result); + + return 0; +} + +static PyObject * +_vec_string_no_args(PyArrayObject* char_array, + PyArray_Descr* type, PyObject* method) +{ + /* This is a faster version of _vec_string_args to use when there + are no additional arguments to the string method. This doesn't + require a broadcast iterator (and broadcast iterators don't work + with 1 argument anyway). */ + PyArrayIterObject* in_iter = NULL; + PyArrayObject* result = NULL; + PyArrayIterObject* out_iter = NULL; + + in_iter = (PyArrayIterObject*)PyArray_IterNew((PyObject*)char_array); + if (in_iter == NULL) { + goto err; + } + + result = (PyArrayObject*)PyArray_SimpleNewFromDescr + (PyArray_NDIM(char_array), PyArray_DIMS(char_array), type); + if (result == NULL) { + goto err; + } + + out_iter = (PyArrayIterObject*)PyArray_IterNew((PyObject*)result); + if (out_iter == NULL) { + goto err; + } + + while (PyArray_ITER_NOTDONE(in_iter)) { + PyObject* item = PyArray_ToScalar(in_iter->dataptr, in_iter->ao); + if (item == NULL) { + goto err; + } + + PyObject* item_result = PyObject_CallFunctionObjArgs(method, item, NULL); + Py_DECREF(item); + if (item_result == NULL) { + goto err; + } + + if (PyArray_SETITEM(result, PyArray_ITER_DATA(out_iter), item_result)) { + Py_DECREF(item_result); + PyErr_SetString( + PyExc_TypeError, + "result array type does not match underlying function"); + goto err; + } + Py_DECREF(item_result); + + PyArray_ITER_NEXT(in_iter); + PyArray_ITER_NEXT(out_iter); + } + + Py_DECREF(in_iter); + Py_DECREF(out_iter); + + return (PyObject*)result; + + err: + Py_XDECREF(in_iter); + Py_XDECREF(out_iter); + Py_XDECREF(result); + + return 0; +} + +static PyObject * +_vec_string(PyObject *NPY_UNUSED(dummy), PyObject *args, PyObject *kwds) +{ + PyArrayObject* char_array = NULL; + PyArray_Descr *type = NULL; + PyObject* method_name; + PyObject* args_seq = NULL; + + PyObject* method = NULL; + PyObject* result = NULL; + + if (!PyArg_ParseTuple(args, "O&O&O|O", + PyArray_Converter, + &char_array, + PyArray_DescrConverter, + &type, + &method_name, + &args_seq)) { + goto err; + } + + if (PyArray_TYPE(char_array) == NPY_STRING) { + method = PyObject_GetAttr((PyObject *)&PyString_Type, method_name); + } else if (PyArray_TYPE(char_array) == NPY_UNICODE) { + method = PyObject_GetAttr((PyObject *)&PyUnicode_Type, method_name); + } else { + PyErr_SetString(PyExc_TypeError, "string operation on non-string array"); + goto err; + } + if (method == NULL) { + goto err; + } + + if (args_seq == NULL || + (PySequence_Check(args_seq) && PySequence_Size(args_seq) == 0)) { + result = _vec_string_no_args(char_array, type, method); + } else if (PySequence_Check(args_seq)) { + result = _vec_string_with_args(char_array, type, method, args_seq); + } else { + PyErr_SetString(PyExc_TypeError, "'args' must be a sequence of arguments"); + goto err; + } + if (result == NULL) { + goto err; + } + + Py_DECREF(char_array); + Py_DECREF(method); + + return (PyObject*)result; + + err: + Py_XDECREF(char_array); + Py_XDECREF(method); + + return 0; +} #ifndef __NPY_PRIVATE_NO_SIGNAL @@ -2397,7 +2612,7 @@ static struct PyMethodDef array_module_methods[] = { {"set_numeric_ops", (PyCFunction)array_set_ops_function, METH_VARARGS|METH_KEYWORDS, NULL}, - {"set_datetimeparse_function", + {"set_datetimeparse_function", (PyCFunction)array_set_datetimeparse_function, METH_VARARGS|METH_KEYWORDS, NULL}, {"set_typeDict", @@ -2475,6 +2690,9 @@ static struct PyMethodDef array_module_methods[] = { {"compare_chararrays", (PyCFunction)compare_chararrays, METH_VARARGS | METH_KEYWORDS, NULL}, + {"_vec_string", + (PyCFunction)_vec_string, + METH_VARARGS | METH_KEYWORDS, NULL}, {"test_interrupt", (PyCFunction)test_interrupt, METH_VARARGS, NULL}, @@ -2729,7 +2947,7 @@ PyMODINIT_FUNC initmultiarray(void) { if (PyErr_Occurred()) { goto err; } - + /* * PyExc_Exception should catch all the standard errors that are * now raised instead of the string exception "multiarray.error" diff --git a/numpy/core/tests/test_defchararray.py b/numpy/core/tests/test_defchararray.py index f5053edb6..723f7b598 100644 --- a/numpy/core/tests/test_defchararray.py +++ b/numpy/core/tests/test_defchararray.py @@ -1,15 +1,111 @@ from numpy.testing import * from numpy.core import * import numpy as np +import sys +from numpy.core.multiarray import _vec_string class TestBasic(TestCase): - def test_construction(self): - A = np.array([['abc', '123'], - ['789', 'xyz']]) - A1 = A.view(np.chararray) - A2 = np.chararray.__new__(np.chararray, A.shape, itemsize=A.itemsize, - buffer=A) - assert all(A1 == A2) + def test_from_object_array(self): + A = np.array([['abc', 2], + ['long ', '0123456789']], dtype='O') + B = np.char.array(A) + assert_equal(B.dtype.itemsize, 10) + assert_array_equal(B, [['abc', '2'], ['long', '0123456789']]) + + def test_from_object_array_unicode(self): + A = np.array([['abc', u'Sigma \u03a3'], + ['long ', '0123456789']], dtype='O') + self.failUnlessRaises(ValueError, np.char.array, (A,)) + B = np.char.array(A, unicode=True) + assert_equal(B.dtype.itemsize, 10 * np.array('a', 'U').dtype.itemsize) + assert_array_equal(B, [['abc', u'Sigma \u03a3'], ['long', '0123456789']]) + + def test_from_string_array(self): + A = np.array([['abc', 'foo'], + ['long ', '0123456789']]) + assert_equal(A.dtype.type, np.string_) + B = np.char.array(A) + assert_array_equal(B, A) + assert_equal(B.dtype, A.dtype) + assert_equal(B.shape, A.shape) + B[0,0] = 'changed' + assert B[0,0] != A[0,0] + C = np.char.asarray(A) + assert_array_equal(C, A) + assert_equal(C.dtype, A.dtype) + C[0,0] = 'changed again' + assert C[0,0] != B[0,0] + assert C[0,0] == A[0,0] + + def test_from_unicode_array(self): + A = np.array([['abc', u'Sigma \u03a3'], + ['long ', '0123456789']]) + assert_equal(A.dtype.type, np.unicode_) + B = np.char.array(A) + assert_array_equal(B, A) + assert_equal(B.dtype, A.dtype) + assert_equal(B.shape, A.shape) + B = np.char.array(A, unicode=True) + assert_array_equal(B, A) + assert_equal(B.dtype, A.dtype) + assert_equal(B.shape, A.shape) + def fail(): + B = np.char.array(A, unicode=False) + self.failUnlessRaises(UnicodeEncodeError, fail) + + def test_unicode_upconvert(self): + A = np.char.array(['abc']) + B = np.char.array([u'\u03a3']) + assert issubclass((A + B).dtype.type, np.unicode_) + + def test_from_string(self): + A = np.char.array('abc') + assert_equal(len(A), 1) + assert_equal(len(A[0]), 3) + assert issubclass(A.dtype.type, np.string_) + + def test_from_unicode(self): + A = np.char.array(u'\u03a3') + assert_equal(len(A), 1) + assert_equal(len(A[0]), 1) + assert_equal(A.itemsize, 4) + assert issubclass(A.dtype.type, np.unicode_) + +class TestVecString(TestCase): + def test_non_existent_method(self): + def fail(): + _vec_string('a', np.string_, 'bogus') + self.failUnlessRaises(AttributeError, fail) + + def test_non_string_array(self): + def fail(): + _vec_string(1, np.string_, 'strip') + self.failUnlessRaises(TypeError, fail) + + def test_invalid_args_tuple(self): + def fail(): + _vec_string(['a'], np.string_, 'strip', 1) + self.failUnlessRaises(TypeError, fail) + + def test_invalid_type_descr(self): + def fail(): + _vec_string(['a'], 'BOGUS', 'strip') + self.failUnlessRaises(TypeError, fail) + + def test_invalid_function_args(self): + def fail(): + _vec_string(['a'], np.string_, 'strip', (1,)) + self.failUnlessRaises(TypeError, fail) + + def test_invalid_result_type(self): + def fail(): + _vec_string(['a'], np.integer, 'strip') + self.failUnlessRaises(TypeError, fail) + + def test_broadcast_error(self): + def fail(): + _vec_string([['abc', 'def']], np.integer, 'find', (['a', 'd', 'j'],)) + self.failUnlessRaises(ValueError, fail) class TestWhitespace(TestCase): @@ -21,15 +117,394 @@ class TestWhitespace(TestCase): def test1(self): assert all(self.A == self.B) - + assert all(self.A >= self.B) + assert all(self.A <= self.B) + assert all(negative(self.A > self.B)) + assert all(negative(self.A < self.B)) + assert all(negative(self.A != self.B)) class TestChar(TestCase): def setUp(self): self.A = np.array('abc1', dtype='c').view(np.chararray) def test_it(self): - assert self.A.shape == (4,) - assert self.A.upper()[:2].tostring() == 'AB' + assert_equal(self.A.shape, (4,)) + assert_equal(self.A.upper()[:2].tostring(), 'AB') + +class TestComparisons(TestCase): + def setUp(self): + self.A = np.array([['abc', '123'], + ['789', 'xyz']]).view(np.chararray) + self.B = np.array([['efg', '123 '], + ['051', 'tuv']]).view(np.chararray) + + def test_not_equal(self): + assert_array_equal((self.A != self.B), [[True, False], [True, True]]) + + def test_equal(self): + assert_array_equal((self.A == self.B), [[False, True], [False, False]]) + + def test_greater_equal(self): + assert_array_equal((self.A >= self.B), [[False, True], [True, True]]) + + def test_less_equal(self): + assert_array_equal((self.A <= self.B), [[True, True], [False, False]]) + + def test_greater(self): + assert_array_equal((self.A > self.B), [[False, False], [True, True]]) + + def test_less(self): + assert_array_equal((self.A < self.B), [[True, False], [False, False]]) + +class TestInformation(TestCase): + def setUp(self): + self.A = np.array([[' abc ', ''], + ['12345', 'MixedCase'], + ['123 \t 345 \0 ', 'UPPER']]).view(np.chararray) + self.B = np.array([[u' \u03a3 ', u''], + [u'12345', u'MixedCase'], + [u'123 \t 345 \0 ', u'UPPER']]).view(np.chararray) + + def test_len(self): + assert issubclass(np.char.str_len(self.A).dtype.type, np.integer) + assert_array_equal(np.char.str_len(self.A), [[5, 0], [5, 9], [12, 5]]) + assert_array_equal(np.char.str_len(self.B), [[3, 0], [5, 9], [12, 5]]) + + def test_count(self): + assert issubclass(self.A.count('').dtype.type, np.integer) + assert_array_equal(self.A.count('a'), [[1, 0], [0, 1], [0, 0]]) + assert_array_equal(self.A.count('123'), [[0, 0], [1, 0], [1, 0]]) + # Python doesn't seem to like counting NULL characters + # assert_array_equal(self.A.count('\0'), [[0, 0], [0, 0], [1, 0]]) + assert_array_equal(self.A.count('a', 0, 2), [[1, 0], [0, 0], [0, 0]]) + assert_array_equal(self.B.count('a'), [[0, 0], [0, 1], [0, 0]]) + assert_array_equal(self.B.count('123'), [[0, 0], [1, 0], [1, 0]]) + # assert_array_equal(self.B.count('\0'), [[0, 0], [0, 0], [1, 0]]) + + def test_endswith(self): + assert issubclass(self.A.endswith('').dtype.type, np.bool_) + assert_array_equal(self.A.endswith(' '), [[1, 0], [0, 0], [1, 0]]) + assert_array_equal(self.A.endswith('3', 0, 3), [[0, 0], [1, 0], [1, 0]]) + def fail(): + self.A.endswith('3', 'fdjk') + self.failUnlessRaises(TypeError, fail) + + def test_find(self): + assert issubclass(self.A.find('a').dtype.type, np.integer) + assert_array_equal(self.A.find('a'), [[1, -1], [-1, 6], [-1, -1]]) + assert_array_equal(self.A.find('3'), [[-1, -1], [2, -1], [2, -1]]) + assert_array_equal(self.A.find('a', 0, 2), [[1, -1], [-1, -1], [-1, -1]]) + assert_array_equal(self.A.find(['1', 'P']), [[-1, -1], [0, -1], [0, 1]]) + + def test_index(self): + def fail(): + self.A.index('a') + self.failUnlessRaises(ValueError, fail) + assert np.char.index('abcba', 'b') == 1 + assert issubclass(np.char.index('abcba', 'b').dtype.type, np.integer) + + def test_isalnum(self): + assert issubclass(self.A.isalnum().dtype.type, np.bool_) + assert_array_equal(self.A.isalnum(), [[False, False], [True, True], [False, True]]) + + def test_isalpha(self): + assert issubclass(self.A.isalpha().dtype.type, np.bool_) + assert_array_equal(self.A.isalpha(), [[False, False], [False, True], [False, True]]) + + def test_isdigit(self): + assert issubclass(self.A.isdigit().dtype.type, np.bool_) + assert_array_equal(self.A.isdigit(), [[False, False], [True, False], [False, False]]) + + def test_islower(self): + assert issubclass(self.A.islower().dtype.type, np.bool_) + assert_array_equal(self.A.islower(), [[True, False], [False, False], [False, False]]) + + def test_isspace(self): + assert issubclass(self.A.isspace().dtype.type, np.bool_) + assert_array_equal(self.A.isspace(), [[False, False], [False, False], [False, False]]) + + def test_istitle(self): + assert issubclass(self.A.istitle().dtype.type, np.bool_) + assert_array_equal(self.A.istitle(), [[False, False], [False, False], [False, False]]) + + def test_isupper(self): + assert issubclass(self.A.isupper().dtype.type, np.bool_) + assert_array_equal(self.A.isupper(), [[False, False], [False, False], [False, True]]) + + def test_rfind(self): + assert issubclass(self.A.rfind('a').dtype.type, np.integer) + assert_array_equal(self.A.rfind('a'), [[1, -1], [-1, 6], [-1, -1]]) + assert_array_equal(self.A.rfind('3'), [[-1, -1], [2, -1], [6, -1]]) + assert_array_equal(self.A.rfind('a', 0, 2), [[1, -1], [-1, -1], [-1, -1]]) + assert_array_equal(self.A.rfind(['1', 'P']), [[-1, -1], [0, -1], [0, 2]]) + + def test_rindex(self): + def fail(): + self.A.rindex('a') + self.failUnlessRaises(ValueError, fail) + assert np.char.rindex('abcba', 'b') == 3 + assert issubclass(np.char.rindex('abcba', 'b').dtype.type, np.integer) + + def test_startswith(self): + assert issubclass(self.A.startswith('').dtype.type, np.bool_) + assert_array_equal(self.A.startswith(' '), [[1, 0], [0, 0], [0, 0]]) + assert_array_equal(self.A.startswith('1', 0, 3), [[0, 0], [1, 0], [1, 0]]) + def fail(): + self.A.startswith('3', 'fdjk') + self.failUnlessRaises(TypeError, fail) + + +class TestMethods(TestCase): + def setUp(self): + self.A = np.array([[' abc ', ''], + ['12345', 'MixedCase'], + ['123 \t 345 \0 ', 'UPPER']]).view(np.chararray) + self.B = np.array([[u' \u03a3 ', u''], + [u'12345', u'MixedCase'], + [u'123 \t 345 \0 ', u'UPPER']]).view(np.chararray) + + def test_capitalize(self): + assert issubclass(self.A.capitalize().dtype.type, np.string_) + assert_array_equal(self.A.capitalize(), [ + [' abc ', ''], + ['12345', 'Mixedcase'], + ['123 \t 345 \0 ', 'Upper']]) + assert issubclass(self.B.capitalize().dtype.type, np.unicode_) + assert_array_equal(self.B.capitalize(), [ + [u' \u03c3 ', ''], + ['12345', 'Mixedcase'], + ['123 \t 345 \0 ', 'Upper']]) + + def test_center(self): + assert issubclass(self.A.center(10).dtype.type, np.string_) + widths = np.array([[10, 20]]) + C = self.A.center([10, 20]) + assert_array_equal(np.char.str_len(C), [[10, 20], [10, 20], [12, 20]]) + C = self.A.center(20, '#') + assert np.all(C.startswith('#')) + assert np.all(C.endswith('#')) + C = np.char.center('FOO', [[10, 20], [15, 8]]) + assert issubclass(C.dtype.type, np.string_) + assert_array_equal(C, [ + [' FOO ', ' FOO '], + [' FOO ', ' FOO ']]) + + def test_decode(self): + A = np.char.array(['736563726574206d657373616765']) + assert A.decode('hex_codec')[0] == 'secret message' + + def test_encode(self): + B = self.B.encode('unicode_escape') + print B[0][0] + assert B[0][0] == r' \u03a3 ' + + def test_expandtabs(self): + T = self.A.expandtabs() + print T + assert T[2][0] == '123 345' + + def test_join(self): + A = np.char.join([',', '#'], self.A) + assert issubclass(A.dtype.type, np.string_) + assert_array_equal(np.char.join([',', '#'], self.A), [ + [' ,a,b,c, ', ''], + ['1,2,3,4,5', 'M#i#x#e#d#C#a#s#e'], + ['1,2,3, ,\t, ,3,4,5, ,\x00, ', 'U#P#P#E#R']]) + + def test_ljust(self): + assert issubclass(self.A.ljust(10).dtype.type, np.string_) + widths = np.array([[10, 20]]) + C = self.A.ljust([10, 20]) + assert_array_equal(np.char.str_len(C), [[10, 20], [10, 20], [12, 20]]) + C = self.A.ljust(20, '#') + assert_array_equal(C.startswith('#'), [ + [False, True], [False, False], [False, False]]) + assert np.all(C.endswith('#')) + C = np.char.ljust('FOO', [[10, 20], [15, 8]]) + assert issubclass(C.dtype.type, np.string_) + assert_array_equal(C, [ + ['FOO ', 'FOO '], + ['FOO ', 'FOO ']]) + + def test_lower(self): + assert issubclass(self.A.lower().dtype.type, np.string_) + assert_array_equal(self.A.lower(), [ + [' abc ', ''], + ['12345', 'mixedcase'], + ['123 \t 345 \0 ', 'upper']]) + assert issubclass(self.B.lower().dtype.type, np.unicode_) + assert_array_equal(self.B.lower(), [ + [u' \u03c3 ', u''], + [u'12345', u'mixedcase'], + [u'123 \t 345 \0 ', u'upper']]) + + def test_lstrip(self): + assert issubclass(self.A.lstrip().dtype.type, np.string_) + assert_array_equal(self.A.lstrip(), [ + ['abc ', ''], + ['12345', 'MixedCase'], + ['123 \t 345 \0 ', 'UPPER']]) + assert_array_equal(self.A.lstrip(['1', 'M']), [ + [' abc', ''], + ['2345', 'ixedCase'], + ['23 \t 345 \x00', 'UPPER']]) + assert issubclass(self.B.lstrip().dtype.type, np.unicode_) + assert_array_equal(self.B.lstrip(), [ + [u'\u03a3 ', ''], + ['12345', 'MixedCase'], + ['123 \t 345 \0 ', 'UPPER']]) + + def test_partition(self): + if sys.version_info >= (2, 5): + P = self.A.partition(['3', 'M']) + assert issubclass(P.dtype.type, np.string_) + assert_array_equal(P, [ + [(' abc ', '', ''), ('', '', '')], + [('12', '3', '45'), ('', 'M', 'ixedCase')], + [('12', '3', ' \t 345 \0 '), ('UPPER', '', '')]]) + + def test_replace(self): + R = self.A.replace(['3', 'a'], ['##########', '@']) + assert issubclass(R.dtype.type, np.string_) + assert_array_equal(R, [ + [' abc ', ''], + ['12##########45', 'MixedC@se'], + ['12########## \t ##########45 \x00', 'UPPER']]) + R = self.A.replace('a', u'\u03a3') + assert issubclass(R.dtype.type, np.unicode_) + assert_array_equal(R, [ + [u' \u03a3bc ', ''], + ['12345', u'MixedC\u03a3se'], + ['123 \t 345 \x00', 'UPPER']]) + + def test_rjust(self): + assert issubclass(self.A.rjust(10).dtype.type, np.string_) + widths = np.array([[10, 20]]) + C = self.A.rjust([10, 20]) + assert_array_equal(np.char.str_len(C), [[10, 20], [10, 20], [12, 20]]) + C = self.A.rjust(20, '#') + assert np.all(C.startswith('#')) + assert_array_equal(C.endswith('#'), [[False, True], [False, False], [False, False]]) + C = np.char.rjust('FOO', [[10, 20], [15, 8]]) + assert issubclass(C.dtype.type, np.string_) + assert_array_equal(C, [ + [' FOO', ' FOO'], + [' FOO', ' FOO']]) + + def test_rpartition(self): + if sys.version_info >= (2, 5): + P = self.A.rpartition(['3', 'M']) + assert issubclass(P.dtype.type, np.string_) + assert_array_equal(P, [ + [('', '', ' abc '), ('', '', '')], + [('12', '3', '45'), ('', 'M', 'ixedCase')], + [('123 \t ', '3', '45 \0 '), ('', '', 'UPPER')]]) + + def test_rsplit(self): + A = self.A.rsplit('3') + assert issubclass(A.dtype.type, np.object_) + assert_equal(A.tolist(), [ + [[' abc '], ['']], + [['12', '45'], ['MixedCase']], + [['12', ' \t ', '45 \x00 '], ['UPPER']]]) + + def test_rstrip(self): + assert issubclass(self.A.rstrip().dtype.type, np.string_) + assert_array_equal(self.A.rstrip(), [ + [' abc', ''], + ['12345', 'MixedCase'], + ['123 \t 345', 'UPPER']]) + assert_array_equal(self.A.rstrip(['5', 'ER']), [ + [' abc ', ''], + ['1234', 'MixedCase'], + ['123 \t 345 \x00', 'UPP']]) + assert issubclass(self.B.rstrip().dtype.type, np.unicode_) + assert_array_equal(self.B.rstrip(), [ + [u' \u03a3', ''], + ['12345', 'MixedCase'], + ['123 \t 345', 'UPPER']]) + + def test_strip(self): + assert issubclass(self.A.strip().dtype.type, np.string_) + assert_array_equal(self.A.strip(), [ + ['abc', ''], + ['12345', 'MixedCase'], + ['123 \t 345', 'UPPER']]) + assert_array_equal(self.A.strip(['15', 'EReM']), [ + [' abc ', ''], + ['234', 'ixedCas'], + ['23 \t 345 \x00', 'UPP']]) + assert issubclass(self.B.strip().dtype.type, np.unicode_) + assert_array_equal(self.B.strip(), [ + [u'\u03a3', ''], + ['12345', 'MixedCase'], + ['123 \t 345', 'UPPER']]) + + def test_split(self): + A = self.A.split('3') + assert issubclass(A.dtype.type, np.object_) + assert_equal(A.tolist(), [ + [[' abc '], ['']], + [['12', '45'], ['MixedCase']], + [['12', ' \t ', '45 \x00 '], ['UPPER']]]) + + def test_splitlines(self): + A = np.char.array(['abc\nfds\nwer']).splitlines() + assert issubclass(A.dtype.type, np.object_) + assert A.shape == (1,) + assert len(A[0]) == 3 + + def test_swapcase(self): + assert issubclass(self.A.swapcase().dtype.type, np.string_) + assert_array_equal(self.A.swapcase(), [ + [' ABC ', ''], + ['12345', 'mIXEDcASE'], + ['123 \t 345 \0 ', 'upper']]) + assert issubclass(self.B.swapcase().dtype.type, np.unicode_) + assert_array_equal(self.B.swapcase(), [ + [u' \u03c3 ', u''], + [u'12345', u'mIXEDcASE'], + [u'123 \t 345 \0 ', u'upper']]) + + def test_title(self): + assert issubclass(self.A.title().dtype.type, np.string_) + assert_array_equal(self.A.title(), [ + [' Abc ', ''], + ['12345', 'Mixedcase'], + ['123 \t 345 \0 ', 'Upper']]) + assert issubclass(self.B.title().dtype.type, np.unicode_) + assert_array_equal(self.B.title(), [ + [u' \u03a3 ', u''], + [u'12345', u'Mixedcase'], + [u'123 \t 345 \0 ', u'Upper']]) + + def test_upper(self): + assert issubclass(self.A.upper().dtype.type, np.string_) + assert_array_equal(self.A.upper(), [ + [' ABC ', ''], + ['12345', 'MIXEDCASE'], + ['123 \t 345 \0 ', 'UPPER']]) + assert issubclass(self.B.upper().dtype.type, np.unicode_) + assert_array_equal(self.B.upper(), [ + [u' \u03a3 ', u''], + [u'12345', u'MIXEDCASE'], + [u'123 \t 345 \0 ', u'UPPER']]) + + def test_isnumeric(self): + def fail(): + self.A.isnumeric() + self.failUnlessRaises(TypeError, fail) + assert issubclass(self.B.isnumeric().dtype.type, np.bool_) + assert_array_equal(self.B.isnumeric(), [ + [False, False], [True, False], [False, False]]) + + def test_isdecimal(self): + def fail(): + self.A.isdecimal() + self.failUnlessRaises(TypeError, fail) + assert issubclass(self.B.isdecimal().dtype.type, np.bool_) + assert_array_equal(self.B.isdecimal(), [ + [False, False], [True, False], [False, False]]) class TestOperations(TestCase): @@ -42,20 +517,21 @@ class TestOperations(TestCase): def test_add(self): AB = np.array([['abcefg', '123456'], ['789051', 'xyztuv']]).view(np.chararray) - assert all(AB == (self.A + self.B)) + assert_array_equal(AB, (self.A + self.B)) + assert len((self.A + self.B)[0][0]) == 6 def test_radd(self): QA = np.array([['qabc', 'q123'], ['q789', 'qxyz']]).view(np.chararray) - assert all(QA == ('q' + self.A)) + assert_array_equal(QA, ('q' + self.A)) def test_mul(self): A = self.A -# for r in (2,3,5,7,197): -# Ar = np.array([[A[0,0]*r, A[0,1]*r], -# [A[1,0]*r, A[1,1]*r]]).view(np.chararray) -# -# assert all(Ar == (self.A * r)) + for r in (2,3,5,7,197): + Ar = np.array([[A[0,0]*r, A[0,1]*r], + [A[1,0]*r, A[1,1]*r]]).view(np.chararray) + + assert_array_equal(Ar, (self.A * r)) for ob in [object(), 'qrs']: try: @@ -67,11 +543,10 @@ class TestOperations(TestCase): def test_rmul(self): A = self.A -# for r in (2,3,5,7,197): -# Ar = np.array([[A[0,0]*r, A[0,1]*r], -# [A[1,0]*r, A[1,1]*r]]).view(np.chararray) -# -# assert all(Ar == (r * self.A)) + for r in (2,3,5,7,197): + Ar = np.array([[A[0,0]*r, A[0,1]*r], + [A[1,0]*r, A[1,1]*r]]).view(np.chararray) + assert_array_equal(Ar, (r * self.A)) for ob in [object(), 'qrs']: try: @@ -82,7 +557,19 @@ class TestOperations(TestCase): self.fail("chararray can only be multiplied by integers") def test_mod(self): - pass + # From Alan McIntyre's bug #856 + F = np.array([['%d', '%f'],['%s','%r']]).view(np.chararray) + C = np.array([[3,7],[19,1]]) + FC = np.array([['3', '7.000000'], + ['19', '1']]).view(np.chararray) + assert_array_equal(FC, F % C) + + A = np.array([['%.3f','%d'],['%s','%r']]).view(np.chararray) + A1 = np.array([['1.000','1'],['1','1']]).view(np.chararray) + assert_array_equal(A1, (A % 1)) + + A2 = np.array([['1.000','2'],['3','4']]).view(np.chararray) + assert_array_equal(A2, (A % [[1,2],[3,4]])) def test_rmod(self): assert ("%s" % self.A) == str(self.A) @@ -98,5 +585,6 @@ class TestOperations(TestCase): "non-string objects") + if __name__ == "__main__": run_module_suite() diff --git a/numpy/core/tests/test_regression.py b/numpy/core/tests/test_regression.py index f638558d1..01f9ea58e 100644 --- a/numpy/core/tests/test_regression.py +++ b/numpy/core/tests/test_regression.py @@ -1118,5 +1118,37 @@ class TestRegression(TestCase): i = np.lexsort((a[::-1], b)) assert_equal(i, np.arange(100, dtype=np.int)) + def test_object_array_to_fixed_string(self): + """Ticket #1235.""" + a = np.array(['abcdefgh', 'ijklmnop'], dtype=np.object_) + b = np.array(a, dtype=(np.string_, 8)) + assert_equal(a, b) + c = np.array(a, dtype=(np.string_, 5)) + assert_equal(c, np.array(['abcde', 'ijklm'])) + d = np.array(a, dtype=(np.string_, 12)) + assert_equal(a, d) + e = np.empty((2, ), dtype=(np.string_, 8)) + e[:] = a[:] + assert_equal(a, e) + + def test_unicode_to_string_cast(self): + """Ticket #1240.""" + a = np.array([[u'abc', u'\u03a3'], [u'asdf', u'erw']], dtype='U') + def fail(): + b = np.array(a, 'S4') + self.failUnlessRaises(UnicodeEncodeError, fail) + + def test_mixed_string_unicode_array_creation(self): + a = np.array(['1234', u'123']) + assert a.itemsize == 16 + a = np.array([u'123', '1234']) + assert a.itemsize == 16 + a = np.array(['1234', u'123', '12345']) + assert a.itemsize == 20 + a = np.array([u'123', '1234', u'12345']) + assert a.itemsize == 20 + a = np.array([u'123', '1234', u'1234']) + assert a.itemsize == 16 + if __name__ == "__main__": run_module_suite() |