summaryrefslogtreecommitdiff
path: root/numpy/core/defchararray.py
diff options
context:
space:
mode:
authorPauli Virtanen <pav@iki.fi>2009-10-02 19:36:37 +0000
committerPauli Virtanen <pav@iki.fi>2009-10-02 19:36:37 +0000
commit094a81e1352fb6b3a7a614fc8df23d0080cf7cb5 (patch)
tree0ec0b833b714ccf87085a43de363287edec5c7ee /numpy/core/defchararray.py
parentd8f0614a886488b875e34555c590e6469e24102a (diff)
downloadnumpy-094a81e1352fb6b3a7a614fc8df23d0080cf7cb5.tar.gz
Docstring update: core
Diffstat (limited to 'numpy/core/defchararray.py')
-rw-r--r--numpy/core/defchararray.py469
1 files changed, 461 insertions, 8 deletions
diff --git a/numpy/core/defchararray.py b/numpy/core/defchararray.py
index 9866954c1..580bde59a 100644
--- a/numpy/core/defchararray.py
+++ b/numpy/core/defchararray.py
@@ -1,3 +1,14 @@
+"""
+Module for character arrays.
+
+.. note::
+ The chararray module exists for backwards compatibility with Numarray,
+ it is not recommended for new development. If one needs arrays of
+ strings, use arrays of `dtype` object.
+
+The preferred alias for `defchararray` is `numpy.char`.
+
+"""
import sys
from numerictypes import string_, unicode_, integer, object_
from numeric import ndarray, broadcast, empty, compare_chararrays
@@ -20,14 +31,62 @@ class chararray(ndarray):
chararray(shape, itemsize=1, unicode=False, buffer=None, offset=0,
strides=None, order=None)
- A character array of string or unicode type.
-
- The array items will be `itemsize` characters long.
-
- Create the array using buffer (with offset and strides) if it is
- not None. If buffer is None, then construct a new array with strides
- in Fortran order if len(shape) >=2 and order is 'Fortran' (otherwise
- the strides will be in 'C' order).
+ An array of fixed size (perhaps unicode) strings.
+
+ .. note::
+ The chararray module exists for backwards compatibility with Numarray,
+ it is not recommended for new development. If one needs arrays of
+ strings, use arrays of `dtype` object.
+
+ Create the array, using `buffer` (with `offset` and `strides`) if it is
+ not ``None``. If `buffer` is ``None``, then construct a new array with
+ `strides` in "C order," unless both ``len(shape) >= 2`` and
+ ``order='Fortran'``, in which case `strides` is in "Fortran order."
+
+ Parameters
+ ----------
+ shape : tuple
+ Shape of the array.
+
+ itemsize : int_like, > 0, optional
+ Length of each array element, in number of characters. Default is 1.
+
+ unicode : {True, False}, optional
+ Are the array elements of unicode-type (``True``) or string-type
+ (``False``, the default).
+
+ buffer : integer, > 0, optional
+ Memory address of the start of the array data. If ``None`` (the
+ default), a new array is created.
+
+ offset : integer, >= 0, optional
+ Fixed stride displacement from the beginning of an axis? Default is
+ 0.
+
+ strides : array_like(?), optional
+ Strides for the array (see `numpy.ndarray.strides` for full
+ description), default is ``None``.
+
+ order : {'C', 'F'}, optional
+ The order in which the array data is stored in memory: 'C' -> "row
+ major" order (the default), 'F' -> "column major" (Fortran) order
+
+ Examples
+ --------
+ >>> charar = np.chararray((3, 3))
+ >>> charar[:,:] = 'abc'
+ >>> charar
+ chararray([['a', 'a', 'a'],
+ ['a', 'a', 'a'],
+ ['a', 'a', 'a']],
+ dtype='|S1')
+ >>> charar = np.chararray(charar.shape, itemsize=5)
+ >>> charar[:,:] = 'abc'
+ >>> charar
+ chararray([['abc', 'abc', 'abc'],
+ ['abc', 'abc', 'abc'],
+ ['abc', 'abc', 'abc']],
+ dtype='|S5')
"""
def __new__(subtype, shape, itemsize=1, unicode=False, buffer=None,
@@ -144,6 +203,23 @@ class chararray(ndarray):
return NotImplemented
def argsort(self, axis=-1, kind='quicksort', order=None):
+ """
+ Return the indices that sort the array lexicographically.
+
+ For full documentation see `numpy.argsort`, for which this method is
+ in fact merely a "thin wrapper."
+
+ Examples
+ --------
+ >>> c = np.array(['a1b c', '1b ca', 'b ca1', 'Ca1b'], 'S5')
+ >>> c = c.view(np.chararray); c
+ chararray(['a1b c', '1b ca', 'b ca1', 'Ca1b'],
+ dtype='|S5')
+ >>> c[c.argsort()]
+ chararray(['1b ca', 'Ca1b', 'a1b c', 'b ca1'],
+ dtype='|S5')
+
+ """
return self.__array__().argsort(axis, kind, order)
def _generalmethod(self, name, myiter):
@@ -184,6 +260,33 @@ class chararray(ndarray):
return result
def capitalize(self):
+ """
+ Capitalize the first character of each array element.
+
+ For each element of `self`, if the first character is a letter
+ possessing both "upper-case" and "lower-case" forms, and it is
+ presently in lower-case, change it to upper-case; otherwise, leave
+ it untouched.
+
+ Parameters
+ ----------
+ None
+
+ Returns
+ -------
+ ret : chararray
+ `self` with each element "title-cased."
+
+ Examples
+ --------
+ >>> c = np.array(['a1b2','1b2a','b2a1','2a1b'],'S4').view(np.chararray); c
+ chararray(['a1b2', '1b2a', 'b2a1', '2a1b'],
+ dtype='|S4')
+ >>> c.capitalize()
+ chararray(['A1b2', '1b2a', 'B2a1', '2a1b'],
+ dtype='|S4')
+
+ """
return self._samemethod('capitalize')
if sys.version[:3] >= '2.4':
@@ -208,15 +311,161 @@ class chararray(ndarray):
return self._generalmethod('center', broadcast(self, width))
def count(self, sub, start=None, end=None):
+ """
+ Return the number of occurrences of a sub-string in each array element.
+
+ Parameters
+ ----------
+ sub : string
+ The sub-string to count.
+ start : int, optional
+ The string index at which to start counting in each element.
+ end : int, optional
+ The string index at which to end counting in each element.
+
+ Returns
+ -------
+ ret : ndarray of ints
+ Array whose elements are the number of occurrences of `sub` in each
+ element of `self`.
+
+ Examples
+ --------
+ >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']).view(np.chararray)
+ >>> c
+ chararray(['aAaAaA', ' aA', 'abBABba'],
+ dtype='|S7')
+ >>> c.count('A')
+ array([3, 1, 1])
+ >>> c.count('aA')
+ array([3, 1, 0])
+ >>> c.count('A', start=1, end=4)
+ array([2, 1, 1])
+ >>> c.count('A', start=1, end=3)
+ array([1, 0, 0])
+
+ """
return self._typedmethod('count', broadcast(self, sub, start, end), int)
def decode(self,encoding=None,errors=None):
+ """
+ Return elements decoded according to the value of `encoding`.
+
+ Parameters
+ ----------
+ encoding : string, optional
+ The encoding to use; for a list of acceptable values, see the
+ Python docstring for the package 'encodings'
+ error : Python exception object?, optional
+ The exception to raise if decoding fails?
+
+ Returns
+ -------
+ ret : chararray
+ A view of `self`, suitably decoded.
+
+ See Also
+ --------
+ encode
+ encodings
+ (package)
+
+ Examples
+ --------
+ >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']).view(np.chararray)
+ >>> c
+ chararray(['aAaAaA', ' aA', 'abBABba'],
+ dtype='|S7')
+ >>> c = c.encode(encoding='cp037'); c
+ chararray(['\\x81\\xc1\\x81\\xc1\\x81\\xc1', '@@\\x81\\xc1@@',
+ '\\x81\\x82\\xc2\\xc1\\xc2\\x82\\x81'],
+ dtype='|S7')
+ >>> c.decode(encoding='cp037')
+ chararray(['aAaAaA', ' aA', 'abBABba'],
+ dtype='|S7')
+
+ """
return self._generalmethod('decode', broadcast(self, encoding, errors))
def encode(self,encoding=None,errors=None):
+ """
+ Return elements encoded according to the value of `encoding`.
+
+ Parameters
+ ----------
+ encoding : string, optional
+ The encoding to use; for a list of acceptable values, see the
+ Python docstring for `encodings`.
+ error : Python exception object, optional
+ The exception to raise if encoding fails.
+
+ Returns
+ -------
+ ret : chararray
+ A view of `self`, suitably encoded.
+
+ See Also
+ --------
+ decode
+
+ Examples
+ --------
+ >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']).view(np.chararray)
+ >>> c
+ chararray(['aAaAaA', ' aA', 'abBABba'],
+ dtype='|S7')
+ >>> c.encode(encoding='cp037')
+ chararray(['\\x81\\xc1\\x81\\xc1\\x81\\xc1', '@@\\x81\\xc1@@',
+ '\\x81\\x82\\xc2\\xc1\\xc2\\x82\\x81'],
+ dtype='|S7')
+
+ """
return self._generalmethod('encode', broadcast(self, encoding, errors))
def endswith(self, suffix, start=None, end=None):
+ """
+ Check whether elements end with specified suffix
+
+ Given an array of strings, return a new bool array of same shape with
+ the result of comparing suffix against each element; each element
+ of bool array is ``True`` if element ends with specified suffix and
+ ``False`` otherwise.
+
+ Parameters
+ ----------
+ suffix : string
+ Compare each element in array to this.
+ start : int, optional
+ For each element, start searching from this position.
+ end : int, optional
+ For each element, stop comparing at this position.
+
+ Returns
+ -------
+ endswith : ndarray
+ Output array of bools
+
+ See Also
+ --------
+ count
+ find
+ index
+ startswith
+
+ Examples
+ --------
+ >>> s = chararray(3, itemsize=3)
+ >>> s[0] = 'foo'
+ >>> s[1] = 'bar'
+ >>> s
+ chararray(['foo', 'bar'],
+ dtype='|S3')
+ >>> s.endswith('ar')
+ array([False, True], dtype=bool)
+ >>> s.endswith('a', start=1, end=2)
+ array([False, True], dtype=bool)
+
+ """
return self._typedmethod('endswith', broadcast(self, suffix, start, end), bool)
def expandtabs(self, tabsize=None):
@@ -261,9 +510,76 @@ class chararray(ndarray):
return self._generalmethod('join', broadcast(self, seq))
def lower(self):
+ """
+ Assure that every character of each array element is lower-case.
+
+ For each character possessing both "upper-case" and "lower-case" forms,
+ if it is in upper-case, change it to lower; otherwise, leave it unchanged.
+
+ Parameters
+ ----------
+ None
+
+ Returns
+ -------
+ ret : chararray
+ `self` with all capital letters changed to lower-case.
+
+ Examples
+ --------
+ >>> c = np.array(['A1B C', '1BCA', 'BCA1']).view(np.chararray); c
+ chararray(['A1B C', '1BCA', 'BCA1'],
+ dtype='|S5')
+ >>> c.lower()
+ chararray(['a1b c', '1bca', 'bca1'],
+ dtype='|S5')
+
+ """
return self._samemethod('lower')
def lstrip(self, chars):
+ """
+ Remove leading characters from each element.
+
+ Returns a view of ``self`` with `chars` stripped from the start of
+ each element. Note: **No Default** - `chars` must be specified (but if
+ it is explicitly ``None`` or the empty string '', leading whitespace is
+ removed).
+
+ Parameters
+ ----------
+ chars : string_like or None
+ Character(s) to strip; whitespace stripped if `chars` == ``None``
+ or `chars` == ''.
+
+ Returns
+ -------
+ ret : chararray
+ View of ``self``, each element suitably stripped.
+
+ Raises
+ ------
+ TypeError: lstrip() takes exactly 2 arguments (1 given)
+ If `chars` is not supplied.
+
+ Examples
+ --------
+ >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']).view(np.chararray)
+ >>> c
+ chararray(['aAaAaA', ' aA', 'abBABba'],
+ dtype='|S7')
+ >>> c.lstrip('a') # 'a' unstripped from c[1] because whitespace leading
+ chararray(['AaAaA', ' aA', 'bBABba'],
+ dtype='|S6')
+ >>> c.lstrip('A') # leaves c unchanged
+ chararray(['aAaAaA', ' aA', 'abBABba'],
+ dtype='|S7')
+ >>> (c.lstrip(' ') == c.lstrip('')).all()
+ True
+ >>> (c.lstrip(' ') == c.lstrip(None)).all()
+ True
+
+ """
return self._generalmethod('lstrip', broadcast(self, chars))
def replace(self, old, new, count=None):
@@ -276,6 +592,35 @@ class chararray(ndarray):
return self._typedmethod('rindex', broadcast(self, sub, start, end), int)
def rstrip(self, chars=None):
+ """
+ Remove trailing characters.
+
+ Returns a view of ``self`` with `chars` stripped from the end of each
+ element.
+
+ Parameters
+ ----------
+ chars : string_like, optional, default=None
+ Character(s) to remove.
+
+ Returns
+ -------
+ ret : chararray
+ View of ``self``, each element suitably stripped.
+
+ Examples
+ --------
+ >>> c = np.array(['aAaAaA', 'abBABba'], dtype='S7').view(np.chararray); c
+ chararray(['aAaAaA', 'abBABba'],
+ dtype='|S7')
+ >>> c.rstrip('a')
+ chararray(['aAaAaA', 'abBABb'],
+ dtype='|S6')
+ >>> c.rstrip('A')
+ chararray(['aAaAa', 'abBABba'],
+ dtype='|S7')
+
+ """
return self._generalmethod('rstrip', broadcast(self, chars))
def split(self, sep=None, maxsplit=None):
@@ -288,12 +633,95 @@ class chararray(ndarray):
return self._typedmethod('startswith', broadcast(self, prefix, start, end), bool)
def strip(self, chars=None):
+ """
+ Remove leading and trailing characters, whitespace by default.
+
+ Returns a view of ``self`` with `chars` stripped from the start and end of
+ each element; by default leading and trailing whitespace is removed.
+
+ Parameters
+ ----------
+ chars : string_like, optional, default=None
+ Character(s) to strip; whitespace by default.
+
+ Returns
+ -------
+ ret : chararray
+ View of ``self``, each element suitably stripped.
+
+ Examples
+ --------
+ >>> c = np.array(['aAaAaA', ' aA ', 'abBABba']).view(np.chararray)
+ >>> c
+ chararray(['aAaAaA', ' aA', 'abBABba'],
+ dtype='|S7')
+ >>> c.strip()
+ chararray(['aAaAaA', 'aA', 'abBABba'],
+ dtype='|S7')
+ >>> c.strip('a') # 'a' unstripped from c[1] because whitespace leads
+ chararray(['AaAaA', ' aA', 'bBABb'],
+ dtype='|S6')
+ >>> c.strip('A') # 'A' unstripped from c[1] because (unprinted) ws trails
+ chararray(['aAaAa', ' aA', 'abBABba'],
+ dtype='|S7')
+
+ """
return self._generalmethod('strip', broadcast(self, chars))
def swapcase(self):
+ """
+ Switch upper-case letters to lower-case, and vice-versa.
+
+ Parameters
+ ----------
+ None
+
+ Returns
+ -------
+ ret : chararray
+ `self` with all lower-case letters capitalized and all upper-case
+ changed to lower case.
+
+ Examples
+ --------
+ >>> c=np.array(['a1B c','1b Ca','b Ca1','cA1b'],'S5').view(np.chararray);c
+ chararray(['a1B c', '1b Ca', 'b Ca1', 'cA1b'],
+ dtype='|S5')
+ >>> c.swapcase()
+ chararray(['A1b C', '1B cA', 'B cA1', 'Ca1B'],
+ dtype='|S5')
+
+ """
return self._samemethod('swapcase')
def title(self):
+ """
+ Capitalize the first character of each array element.
+
+ For each element of `self`, if the first character is a letter
+ possessing both "upper-case" and "lower-case" forms, and it is
+ presently in lower-case, change it to upper-case; otherwise, leave
+ it untouched.
+
+ Parameters
+ ----------
+ None
+
+ Returns
+ -------
+ ret : chararray
+ `self` with
+
+ Examples
+ --------
+ >>> c=np.array(['a1b c','1b ca','b ca1','ca1b'],'S5').view(np.chararray);c
+ chararray(['a1b c', '1b ca', 'b ca1', 'ca1b'],
+ dtype='|S5')
+ >>> c.title()
+ chararray(['A1B C', '1B Ca', 'B Ca1', 'Ca1B'],
+ dtype='|S5')
+
+ """
return self._samemethod('title')
def translate(self, table, deletechars=None):
@@ -303,6 +731,31 @@ class chararray(ndarray):
return self._generalmethod('translate', broadcast(self, table, deletechars))
def upper(self):
+ """
+ Capitalize every character of each array element.
+
+ For each character possessing both "upper-case" and "lower-case" forms,
+ if it is in lower-case, change it to upper; otherwise, leave it unchanged.
+
+ Parameters
+ ----------
+ None
+
+ Returns
+ -------
+ ret : chararray
+ `self` with all characters capitalized.
+
+ Examples
+ --------
+ >>> c = np.array(['a1b c', '1bca', 'bca1']).view(np.chararray); c
+ chararray(['a1b c', '1bca', 'bca1'],
+ dtype='|S5')
+ >>> c.upper()
+ chararray(['A1B C', '1BCA', 'BCA1'],
+ dtype='|S5')
+
+ """
return self._samemethod('upper')
def zfill(self, width):