Altered flexible types so that NotImplemented is raised in case another type wants to handle it.

author: Travis Oliphant <oliphant@enthought.com> 2005-11-02 23:54:48 +0000
committer: Travis Oliphant <oliphant@enthought.com> 2005-11-02 23:54:48 +0000
commit: e484db1b4e730d58a395381aa1b4a98dee834f59 (patch)
tree: baab5dff644bfa65690aeeb776630a7fba306e68 /scipy/base/chararray.py
parent: 0e488269b5feebe4fb47380fdf19a21da95b1b70 (diff)
download: numpy-e484db1b4e730d58a395381aa1b4a98dee834f59.tar.gz
1 files changed, 1234 insertions, 0 deletions
diff --git a/scipy/base/chararray.py b/scipy/base/chararray.py
new file mode 100644
index 000000000..93210a797
--- /dev/null
+++ b/scipy/base/chararray.py
@@ -0,0 +1,1234 @@
+# TODO:  This needs to be
+#        fleshed out
+#  Adapted from Numarray by J. Todd Miller
+"""
+Large chararray test
+>>> xx=array(None,itemsize=3,shape=220000)
+
+>>> import cPickle
+>>> c=cPickle.loads(cPickle.dumps(fromlist(["this","that","something else"])))
+>>> c
+CharArray(['this', 'that', 'something else'])
+>>> c._type
+CharArrayType(14)
+
+>>> a=fromlist(["this"]*25); a.shape=(5,5); a[ range(2,4) ]
+CharArray([['this', 'this', 'this', 'this', 'this'],
+           ['this', 'this', 'this', 'this', 'this']])
+>>> a[ range(2,4) ] = fromlist(["that"]); a
+CharArray([['this', 'this', 'this', 'this', 'this'],
+           ['this', 'this', 'this', 'this', 'this'],
+           ['that', 'that', 'that', 'that', 'that'],
+           ['that', 'that', 'that', 'that', 'that'],
+           ['this', 'this', 'this', 'this', 'this']])
+
+>>> array([], shape=(0,1,2))
+CharArray([])
+
+>>> a = _gen.concatenate([array(["1"]*3), array(["2"]*3)]); a
+CharArray(['1', '1', '1', '2', '2', '2'])
+>>> _gen.reshape(a, (2,3))
+CharArray([['1', '1', '1'],
+           ['2', '2', '2']])
+
+>>> CharArray(buffer="thatthis", shape=(2,), itemsize=4,
+...              bytestride=-4, byteoffset=4)
+CharArray(['this', 'that'])
+"""
+
+from copy import _EmptyClass
+import memory
+import _bytes
+import _chararray
+import numarrayall as _na
+import generic as _gen
+import sys
+import string
+import types
+import os
+import re
+import arrayprint
+import warnings
+import numerictypes as _nt
+import numinclude as _numinclude
+import _ndarray as _nda
+
+_globals, _locals  = globals, locals
+
+class CharArrayType:
+    def __init__(self, itemsize):
+        self.itemsize = itemsize
+        self.name = "CharArrayType(%d)" % (self.itemsize,)
+
+    def __repr__(self):
+        return self.name
+
+CharArrayTypeCache = {}
+
+def NewCharArrayType(itemsize):
+    """NewCharArrayType creates CharArrayTypes on demand, but checks to see
+    if they already exist in the cache first.  This makes type equivalence
+    the same as object identity.
+    """
+    if itemsize not in CharArrayTypeCache.keys():
+        CharArrayTypeCache[itemsize] = CharArrayType(itemsize)
+    return CharArrayTypeCache[itemsize]
+
+
+class PrecisionWarning(UserWarning):
+    pass
+
+
+class RawCharArray(_na.NDArray):
+    """RawCharArray(buffer=None, shape=None, byteoffset=0, bytestride=None)
+
+      RawCharArray is a fixed length array of characters based on
+      ndarray.NDArray with no automatic stripping or padding.
+
+      itemsize specifies the length of all strings in the array.
+    """
+    _is_raw = 1
+    def __init__(self, buffer=None, itemsize=None, shape=None, byteoffset=0,
+                 bytestride=None, aligned=1, type=None, padc=" "):
+
+        if isinstance(shape, types.IntType):
+            shape = (shape,)
+
+        if type is not None:
+            if itemsize is not None:
+                raise ValueError("Specify type *or* itemsize, not both.")
+            itemsize = type.itemsize
+
+        if not (padc, types.StringType) or len(padc) <> 1:
+            raise ValueError("padc must be a string of length 1.")
+
+        if buffer is None:
+            if shape is None or itemsize is None:
+                raise ValueError("Must define both shape & itemsize if buffer is None")
+        else:
+            if shape is None and itemsize is None:
+                raise ValueError("Must specify shape, itemsize, or both.")
+            ni = _bufferItems( buffer, byteoffset, bytestride, itemsize)
+            if shape and itemsize == None:
+                itemsize = ni/_na.product(shape)
+            if itemsize and shape == None:
+                shape = (ni,)
+            if itemsize == 0:  # Another hack for 0 length strings.
+                bytestride=0
+
+        if not _nda.is_buffer(buffer) and buffer is not None:
+            raise TypeError("buffer must either support the C buffer protocol or return something that does from its __buffer__() method");
+        
+        _na.NDArray.__init__(self, shape=shape, itemsize=itemsize,
+                                 buffer=buffer, byteoffset=byteoffset,
+                                 bytestride=bytestride, aligned=aligned)
+        
+        self._flags |= _gen._UPDATEDICT
+
+        if type is None:
+            type = NewCharArrayType(itemsize)
+
+        self._type = type
+        self._padc = padc
+        
+        if buffer is None:
+            self.fill(" ")
+
+    def __get_array_typestr__(self):
+        return '|S%d' % self._itemsize
+
+    __array_typestr__ = property(__get_array_typestr__, None, "")
+
+    def tolist(self):
+        """returns array as a (nested) list of strings."""
+        if len(self._shape) == 1:
+            if self._shape[0] > 0:
+                return [ x for x in self ]
+            else:
+                return []
+        else:
+            return [ ni.tolist() for ni in self ]
+
+    def __getstate__(self):
+        """returns RawCharArray state dictionary for pickling"""
+        state = _na.NDArray.__getstate__(self)
+        state["_type"] = self._type.itemsize
+        return state
+
+    def __setstate__(self, state):
+        """sets RawCharArray dictionary from pickled state"""
+        _na.NDArray.__setstate__(self, state)
+        self._type = NewCharArrayType(state["_type"])
+
+    def isbyteswapped(self):
+        """CharArray's are *never* byteswapped.  returns 0."""
+        return 0
+
+    def isaligned(self):
+        """CharArray's are *always* aligned.  returns 1."""
+        return 1
+
+    def sinfo(self):
+        "returns string describing a CharArray"
+        s = _na.NDArray.sinfo(self)
+        s += "type: " + repr(self._type) + "\n"
+        return s
+
+    def _getitem(self, offset):
+        """_getitem(self, offset) returns  the "stripped" fixed length
+        string from 'self' at 'offset'.
+        """
+        if isinstance(self._data, memory.MemoryType):
+            s = buffer(self._data)[offset:offset+self._itemsize]
+            return self.strip(s)
+        else:
+            return self.strip(str(self._data[offset:offset+self._itemsize]))
+
+    def _setitem(self, offset, value):
+        """_setitem(self, offset) sets 'offset' to result of "padding" 'value'.
+
+        _setitem silently truncates inputs which are too long.
+
+        >>> s=array([""])
+        >>> s[0] = "this"
+        >>> s
+        CharArray([''])
+        >>> s=array(["this","that"])
+        >>> s[0] = "othe"
+        >>> s
+        CharArray(['othe', 'that'])
+        >>> s[1] = "v"
+        >>> s
+        CharArray(['othe', 'v'])
+        >>> s = array("")
+        >>> s[0] = "this"
+        >>> s
+        CharArray([''])
+        """
+        bo = offset
+        where = memory.writeable_buffer(self._data)
+        where[bo:bo+self._itemsize] = self.pad(value)[0:self._itemsize]
+
+    def _byteView(self):
+        """_byteView(self) returns a view of self as an array of bytes.
+        A _byteView cannot be taken from a chararray with itemsize==0.
+        """
+        if self._itemsize == 0:
+            raise ValueError("_byteView doesn't work for zero length items.")
+        b = _na.NumArray(buffer=self._data,
+                              shape=self._shape+(self._itemsize,),
+                              type=_na.UInt8,
+                              byteoffset=self._byteoffset,
+                              bytestride=self._bytestride)
+        b._strides = self._strides + (1,)
+        return b;
+
+    def _copyFrom(self, arr):
+        """
+        >>> c = array([""])
+        >>> c[:] = array(["this"])
+        >>> c
+        CharArray([''])
+        >>> c = array(["this","that"])
+        >>> c[:] = array(["a really long line","a"])
+        >>> c
+        CharArray(['a re', 'a'])
+        >>> c[:] = ["more","money"]
+        >>> c
+        CharArray(['more', 'mone'])
+        >>> c[:] = array(["x"])
+        >>> c
+        CharArray(['x', 'x'])
+        """
+        if self._itemsize == 0:
+            return
+
+        # Convert lists and strings to chararray.
+        arr = asarray(arr, itemsize=self._itemsize,
+                      padc=self._padc, kind=self.__class__)
+        
+        # Ensure shapes match.
+        arr = self._broadcast( arr )
+        if arr._itemsize == 0: return
+        
+        # Get views of both source and destination as UInt8 numarray.
+        it = arr._byteView()
+        me = self._byteView()
+        if self._itemsize <= arr._itemsize:
+            me[:] = it[..., :self._itemsize]
+        else:
+            me[...,:it._shape[-1]] = it
+            # zero fill the part of subarr *not* covered by arr
+            me[...,it._shape[-1]:] = 0
+
+    def copy(self):
+        """Return a new array with the same shape and type, but a copy
+        of the data
+
+        >>> c = fromlist(["this","that", "another"])
+        >>> d = c.copy()
+        >>> d
+        CharArray(['this', 'that', 'another'])
+        >>> int(c._data is d._data)
+        0
+        """
+        
+        arr = self.view()
+        arr._data = memory.new_memory(arr._itemsize * arr.nelements())
+        arr._byteoffset = 0
+        arr._bytestride = arr._itemsize
+        arr._strides = arr._stridesFromShape()
+        arr._itemsize = self._itemsize
+        if _na.product(self._shape):
+            copyfunction = _bytes.functionDict["copyNbytes"]
+            copyfunction(arr._shape, self._data, self._byteoffset,
+                         self._strides, arr._data, 0, arr._strides,
+                         arr._itemsize)
+        return arr
+
+    def substringView(self, i, j):
+        """substringView returns modified view of the input array which
+        represents only the [i:j] substring of each array element.
+
+        >>> c = fromlist([["this","that"],["another", "one"]])
+        >>> d = c.substringView(1, 2); d
+        CharArray([['h', 'h'],
+                   ['n', 'n']])
+        >>> d[:] = [["1","2"],["3","4"]]; d
+        CharArray([['1', '2'],
+                   ['3', '4']])
+        >>> c
+        CharArray([['t1is', 't2at'],
+                   ['a3other', 'o4e']])
+        """
+
+        n = _na.arange(self._itemsize)[i:j]
+        if len(n) != 0:
+            i = n[0]
+            j = n[-1]+1
+        else:
+            i = j = 0
+        r = self.view()
+        substr_size = j-i
+        if substr_size < 0:
+            substr_size = 0
+        r._itemsize = substr_size
+        r._byteoffset += i
+        return r
+        
+    def _broadcast(self, other):
+        return _na.NDArray._broadcast(self, other)
+
+    def _dualbroadcast(self, other):
+        s, o = _na.NDArray._dualbroadcast(self, other)
+        if not _na.product(s._strides):
+            s = s.copy()
+        if not _na.product(o._strides):
+            o = o.copy()
+        return s, o
+
+    def concatenate(self, other):
+        """concatenate(self, other) concatenates two numarray element by element
+        >>> array(["this", "that", "another"]).stripAll() + "."
+        CharArray(['this.', 'that.', 'another.'])
+        >>> array([""])+array([""])
+        CharArray([''])
+        """
+        a = asarray(other, padc=self._padc, kind=self.__class__)
+        self, a = self._dualbroadcast(a)
+        result = array(buffer=None, shape=self._shape,
+                       itemsize=self._itemsize+a._itemsize,
+                       padc=self._padc, kind=self.__class__)
+        if a is other:  # since stripAll() mutates the array
+            a = a.copy()
+        _chararray.Concat(self.__class__ is RawCharArray,
+                          self, a.stripAll(), result)
+        return result.padAll()
+
+    def __add__(self, other):
+        """
+        >>> map(str, range(3)) + array(["this","that","another one"])
+        CharArray(['0this', '1that', '2another one'])
+        >>> "" + array(["this", "that"])
+        CharArray(['this', 'that'])
+        >>> "prefix with trailing whitespace   " + array(["."])
+        CharArray(['prefix with trailing whitespace   .'])
+        >>> "" + array("")
+        CharArray([''])
+        >>> array(["this", "that", "another one"], kind=RawCharArray) + map(str, range(3))
+        RawCharArray(['this       0', 'that       1', 'another one2'])
+        """
+        return self.concatenate(other)
+
+    def __radd__(self, other):
+        return asarray(other, padc=self._padc,
+                       kind=self.__class__).concatenate(self)
+
+    def __iadd__(self, other):
+        self[:] = self.concatenate(other)
+        return self
+
+    def strip(self, value):
+        return value
+
+    def pad(self, value):
+        return value
+
+    def stripAll(self):
+        return self
+
+    def padAll(self):
+        return self
+
+    def _format(self, x):
+        """_format() formats a single array element for str() or repr()"""
+        return repr(self.strip(x))
+
+    def __cmp__(self, other):
+        s, t = str(self), str(other)
+        return cmp(s,t)
+
+    def fill(self, char):
+        """fill(self, char)   fills the array entirely with 'char'.
+
+        >>> x=array([""])
+        >>> x.fill(' ')
+        >>> x
+        CharArray([''])
+        >>> x=array(["this"])
+        >>> x.fill("x")
+        >>> x
+        CharArray(['xxxx'])
+        """
+        if self._itemsize and self.nelements():
+            if self.rank > 0:
+                self[:] = char*self._itemsize
+            else:
+                self[()] =  char*self._itemsize
+
+    def raw(self):
+        """raw(self) returns a raw view of self.
+        >>> c=fromlist(["this","that","another"])
+        >>> c.raw()
+        RawCharArray(['this   ', 'that   ', 'another'])
+        """
+        arr = self.view()
+        arr.__class__ = RawCharArray    # "Anchor" on RawCharArray.
+        return arr
+
+    def contiguous(self):
+        """contiguous(self) returns a version of self which is guaranteed to
+        be contiguous.  If self is contiguous already, it returns self.
+        Otherwise, it returns a copy of self.
+        """
+        if self.iscontiguous():
+            return self
+        else:
+            return self.copy()
+
+    def resized(self, n, fill='\0'):
+        """resized(self, n) returns a copy of self, resized so that each
+        item is of length n characters.  Extra characters are filled with
+        the value of 'fill'. If self._itemsize == n, self is returned.
+
+        >>> c = fromlist(["this","that","another"])
+        >>> c._itemsize
+        7
+        >>> d=c.resized(20)
+        >>> d
+        CharArray(['this', 'that', 'another'])
+        >>> d._itemsize
+        20
+        """
+        if self._itemsize != n:
+            ext = self.__class__(shape=self._shape, itemsize=n)
+            ext.fill(fill)
+            ext[:] = self
+            return ext
+        else:
+            return self
+
+    # First half of comparison operators,  slow version.
+    def _StrCmp(self, mode, raw, other0):
+        """StrCmp(self, other0)  calls strncmp on corresponding items of the
+        two numarray, self and other.
+        """
+        if not isinstance(other0, self.__class__):
+            other = asarray(other0, padc=self._padc, kind=self.__class__)
+        else:
+            other = other0
+        if self._shape != other._shape:
+            self, other = self._dualbroadcast(other)
+        if self._itemsize < other._itemsize:
+            self = self.resized(other._itemsize)
+        elif other._itemsize < self._itemsize:
+            other = other.resized(self._itemsize)
+        if self is None:
+            raise ValueError("Incompatible array dimensions")
+        return _chararray.StrCmp(self, mode, raw, other)
+
+    # rich comparisons (only works in Python 2.1 and later)
+    def __eq__(self, other):
+        """
+        >>> array(["this ", "thar", "other"]).__eq__(array(["this", "that", "another"]))
+        array([1, 0, 0], type=Bool)
+        >>> array([""]).__eq__(array([""]))
+        array([1], type=Bool)
+        >>> array([""]).__eq__(array(["x"]))
+        array([0], type=Bool)
+        >>> array(["x"]).__eq__(array([""]))
+        array([0], type=Bool)
+        """
+        return _chararray.StrCmp(self, 0, self._is_raw, other)
+
+    def __ne__(self, other):
+        """
+        >>> s=array(["this ", "thar", "other"])
+        >>> t=array(["this", "that", "another"])
+        >>> s.__ne__(t)
+        array([0, 1, 1], type=Bool)
+        """
+        return _chararray.StrCmp(self, 1, self._is_raw, other)
+
+    def __lt__(self, other):
+        """
+        >>> s=array(["this ", "thar", "other"])
+        >>> t=array(["this", "that", "another"])
+        >>> s.__lt__(t)
+        array([0, 1, 0], type=Bool)
+        """
+        return _chararray.StrCmp(self, 2, self._is_raw, other)
+
+    def __gt__(self, other):
+        """
+        >>> s=array(["this ", "thar", "other"])
+        >>> t=array(["this", "that", "another"])
+        >>> s.__gt__(t)
+        array([0, 0, 1], type=Bool)
+        """
+        return _chararray.StrCmp(self, 3, self._is_raw, other)
+
+    def __le__(self, other):
+        """
+        >>> s=array(["this ", "thar", "other"])
+        >>> t=array(["this", "that", "another"])
+        >>> s.__le__(t)
+        array([1, 1, 0], type=Bool)
+        """
+        return _chararray.StrCmp(self, 4, self._is_raw, other)
+
+    def __ge__(self, other):
+        """
+        >>> s=array(["this ", "thar", "other"])
+        >>> t=array(["this", "that", "another"])
+        >>> s.__ge__(t)
+        array([1, 0, 1], type=Bool)
+        """
+        return _chararray.StrCmp(self, 5, self._is_raw, other)
+
+    if sys.version_info >= (2,1,0):
+        def _test_rich_comparisons():
+            """
+            >>> s=array(["this ", "thar", "other"])
+            >>> t=array(["this", "that", "another"])
+            >>> s == t
+            array([1, 0, 0], type=Bool)
+            >>> s < t
+            array([0, 1, 0], type=Bool)
+            >>> s >= t
+            array([1, 0, 1], type=Bool)
+            >>> s <= t
+            array([1, 1, 0], type=Bool)
+            >>> s > t
+            array([0, 0, 1], type=Bool)
+            """
+            pass
+
+    def __contains__(self, str):
+        """
+        Returns 1 if-and-only-if 'self' has an element == to 'str'
+
+        >>> s=array(["this ", "thar", "other"])
+        >>> int("this" in s)
+        1
+        >>> int("tjt" in s)
+        0
+        >>> x=array([""])
+        >>> int("this" in x)
+        0
+        >>> int("" in x)
+        1
+        """
+        return _na.logical_or.reduce(_na.ravel(self.__eq__(str)))
+
+    def sort(self):
+        """
+        >>> a=fromlist(["other","this","that","another"])
+        >>> a.sort()
+        >>> a
+        CharArray(['another', 'other', 'that', 'this'])
+        """
+        l = self.tolist()
+        l.sort()
+        self[:] = fromlist(l)
+
+    def argsort(self, axis=-1):
+        """
+        >>> a=fromlist(["other","that","this","another"])
+        >>> a.argsort()
+        array([3, 0, 1, 2])
+        """
+        if axis != -1:
+            raise TypeError("CharArray.argsort() does not support the axis parameter.")
+        ax = range(len(self))
+        ax.sort(lambda x,y,z=self: cmp(z[x],z[y]))
+        return _na.array(ax)
+
+    def amap(self, f):
+        """amap() returns the nested list which results from applying
+        function 'f' to each element of 'self'.
+        """
+        if len(self._shape) == 1:
+            return [f(i) for i in self]
+        else:
+            ans = []
+            for i in self:
+                ans.append(i.amap(f))
+            return ans
+
+    def match(self, pattern, flags=0):
+        """
+        >>> a=fromlist([["wo","what"],["wen","erewh"]])
+        >>> a.match("wh[aebd]")
+        (array([0]), array([1]))
+        >>> a.match("none")
+        (array([], type=Long), array([], type=Long))
+        >>> b=array([""])
+        >>> b.match("this")
+        (array([], type=Long),)
+        >>> b.match("")
+        (array([0]),)
+        """
+        matcher = re.compile(pattern, flags).match
+        l = lambda x, f=matcher: int(f(x) is not None)
+        matches = _na.array(self.amap(l), type=_na.Bool)
+        if len(matches):
+            return _na.nonzero(matches)
+        else:
+            return ()
+
+    def search(self, pattern, flags=0):
+        """
+        >>> a=fromlist([["wo","what"],["wen","erewh"]])
+        >>> a.search("wh")
+        (array([0, 1]), array([1, 1]))
+        >>> a.search("1")
+        (array([], type=Long), array([], type=Long))
+        >>> b=array(["",""])
+        >>> b.search("1")
+        (array([], type=Long),)
+        >>> b.search("")
+        (array([0, 1]),)
+        """
+        searcher = re.compile(pattern, flags).search
+        l = lambda x, f=searcher: int(f(x) is not None)
+        matches = _na.array(self.amap(l), type=_na.Bool)
+        if len(matches):
+            return _na.nonzero(matches)
+        else:
+            return ()
+
+    def grep(self, pattern, flags=0):
+        """
+        >>> a=fromlist([["who","what"],["when","where"]])
+        >>> a.grep("whe")
+        CharArray(['when', 'where'])
+        """
+        return _gen.take(self, self.match(pattern, flags), axis=(0,))
+
+    def sub(self, pattern, replacement, flags=0, count=0):
+        """
+        >>> a=fromlist([["who","what"],["when","where"]])
+        >>> a.sub("wh", "ph")
+        >>> a
+        CharArray([['pho', 'phat'],
+                   ['phen', 'phere']])
+        """
+        cpat = re.compile(pattern, flags)
+        l = lambda x, p=cpat, r=replacement, c=count: re.sub(p, r, x, c)
+        self[:] = fromlist( self.amap(l) )
+
+    def eval(self):
+        """eval(self) converts CharArray 'self' into a NumArray.
+        This is the original slow implementation based on a Python loop
+        and the eval() function.
+
+        >>> array([["1","2"],["3","4"]]).eval()
+        array([[1, 2],
+               [3, 4]])
+        >>> try:
+        ...    array([["1","2"],["3","other"]]).eval()
+        ... except NameError:
+        ...    pass
+        """
+        n = _na.array([ eval(x,{},{}) for x in _na.ravel(self)])
+        n.setshape(self._shape)
+        return n
+
+    def fasteval(self, type=_na.Float64):
+
+        """fasteval(self, type=Float64) converts CharArray 'self' into
+        a NumArray of the specified type.  fasteval() can't convert
+        complex arrays at all, and loses precision when converting
+        UInt64 or Int64.
+
+        >>> array([["1","2"],["3","4"]]).fasteval().astype('Long')
+        array([[1, 2],
+               [3, 4]])
+        >>> try:
+        ...    array([["1","2"],["3","other"]]).fasteval()
+        ... except _chararray.error:
+        ...    pass
+        """
+        n = _na.array(shape=self._shape, type=_na.Float64)
+        type = _nt.getType(type)
+        _chararray.Eval((), self, n);
+        if type != _na.Float64:
+	    if ((type is _na.Int64) or 
+		(_numinclude.hasUInt64 and type is _na.UInt64)):
+                warnings.warn("Loss of precision converting to 64-bit type.  Consider using eval().", PrecisionWarning)
+            return n.astype(type)
+        else:
+            return n
+        
+class CharArray(RawCharArray):
+    """
+    >>> array("thisthatthe othe",shape=(4,),itemsize=4)
+    CharArray(['this', 'that', 'the', 'othe'])
+    >>> array("thisthatthe othe",shape=(4,),itemsize=4)._shape
+    (4,)
+    >>> array("thisthatthe othe",shape=(4,),itemsize=4)._itemsize
+    4
+    >>> array([["this","that"],["x","y"]])
+    CharArray([['this', 'that'],
+               ['x', 'y']])
+    >>> array([["this","that"],["x","y"]])._shape
+    (2, 2)
+    >>> array([["this","that"],["x","y"]])._itemsize
+    4
+    >>> s=array([["this","that"],["x","y"]], itemsize=10)
+    >>> s
+    CharArray([['this', 'that'],
+               ['x', 'y']])
+    >>> s._itemsize
+    10
+    >>> s[0][0]
+    'this'
+    >>> # s[1,1][0] = 'z' # Char assigment doesn't work!
+    >>> s[1,1] = 'z'      # But padding may do what you want.
+    >>> s                 # Otherwise, specify all of s[1,1] or subclass.
+    CharArray([['this', 'that'],
+               ['x', 'z']])
+    """
+
+    _is_raw = 0
+    
+    def resized(self, n, fill=' '):
+        """Same as RawCharArray.resized() but fills with blanks rather than
+        NUL."""        
+        return RawCharArray.resized(self, n, fill)
+    
+    def pad(self, value):
+        """
+        pad(self, value)   implements CharArray's string-filling policy
+        which is used when strings are assigned to elements of a CharArray.
+        Pad extends 'value' to length self._itemsize using spaces.
+        """
+        return _chararray.Pad(value, self._itemsize, ord(self._padc))
+
+    def strip(self, value):
+        """
+        strip(self, value) implements CharArray's string fetching
+        "cleanup" policy. strip truncates 'value' at the first NULL
+        and removes all trailing whitespace from the remainder.  For
+        compatability with FITS, leading whitespace is never
+        completely stripped: a string beginning with a space always
+        returns at least one space to distinguish it from the empty
+        string.
+        """
+        return _chararray.Strip(value)
+
+    def stripAll(self):
+        """
+        stripAll(self) applies the chararray strip function to each element
+        of self and returns the result.  The result may be a new array.
+        """
+        _chararray.StripAll(None, self)
+        return self
+
+    def padAll(self):
+        """
+        padAll(self) applies the chararray pad function to each element
+        of self and returns the result.  The result may be a new array.
+        """
+        _chararray.PadAll(self._padc, self)
+        return self
+
+    def toUpper(self):
+        """toUpper(self) converts all elements of self to upper case
+
+        >>> a = fromlist(["That","this","another"])
+        >>> a.toUpper()
+        >>> a
+        CharArray(['THAT', 'THIS', 'ANOTHER'])
+        """
+        _chararray.ToUpper(None, self)
+
+    def toLower(self):
+        """toLower(self) converts all elements of self to upper case
+
+        >>> a = fromlist(["THAT","this","anOther"])
+        >>> a.toLower()
+        >>> a
+        CharArray(['that', 'this', 'another'])
+        """
+        _chararray.ToLower(None, self)
+
+    def maxLen(self):
+        """
+        maxLen(self) computes the length of the longest string in
+        self.  Maxlen will applies the strip function to each element
+        prior to computing its length.
+
+        >>> array(["this  ","that"]).maxLen()
+        4
+        """
+        n = _na.NumArray(buffer=None, shape=self.shape, type=_na.Int32)
+        _chararray.StrLen(None, self, n)
+        return n.max()
+
+    def truncated(self):
+        """
+        truncate(self) returns a new array with the smallest possible itemsize
+        which will hold the stripped contents of self.
+
+        >>> array(["this  ","that"])._itemsize
+        6
+        >>> array(["this  ","that"]).truncated()._itemsize
+        4
+        """
+        return self.resized(self.maxLen())
+
+    def count(self, s):
+        """count(self, s) counts the number of occurences of string 's'.
+        >>> int(array(["this","that","another","this"]).count("this"))
+        2
+        """
+        return self.__eq__(s).sum('Int64')
+
+    def index(self, s):
+        """index(self, s) returns the index of the first occurenced of
+        's' in 'self'.
+
+        >>> array([["this","that","another"],
+        ...        ["another","this","that"]]).index("another")
+        (0, 2)
+        >>> array([["this","that","another"],
+        ...        ["another","this","that"]]).index("not here")
+        Traceback (most recent call last):
+        ValueError: string 'not here' not in array
+
+        """
+        indices = _na.nonzero(self.__eq__(s))
+        if len(indices[0]):
+            first = map(lambda x: x[0], indices)
+            return tuple(first)
+        else:
+            raise ValueError("string " + `s` +" not in array")
+
+def isString(s):
+    return isinstance(s, types.StringType)
+
+def isPySequence(s):
+    return hasattr(s, '__getitem__') and hasattr(s,'__len__')
+
+def _slistShape0(slist):
+    """_slistShape0(slist) computes the (shape+(itemsize,)) tuple
+    of string list 'slist'.
+
+    itemsize is set to the maximum of all string lengths in slist.
+
+    >>> s=["this","that","the other"]
+    >>> _slistShape(s)
+    ((3,), 9)
+    >>> _slistShape((s,s,s,s))
+    ((4, 3), 9)
+    >>> _slistShape(["this", ["that","other"]])
+    Traceback (most recent call last):
+    ...
+    ValueError: Nested sequences with different lengths.
+    """
+    if isinstance(slist, types.StringType):
+        return ((), len(slist),)
+    elif len(slist) == 0:
+        return ((0,), 0)
+    else:
+        maxs = _slistShape0(slist[0])
+        sizes = {}
+        for s in slist:
+            if isinstance(s, types.StringType):
+                maxs = max(maxs, ((), len(s)))
+                sizes[1] = 1  # ignore 
+            else:
+                maxs = max(maxs, _slistShape0(s))
+                sizes[len(s)] = 1
+        if len(sizes.keys()) != 1:
+            raise ValueError("Nested sequences with different lengths.")
+        return (((len(slist),)+ maxs[0]), maxs[1])
+
+def _slistShape(slist, itemsize=None, shape=None):
+    """_slistShape(slist, itemsize=None, shape=None)  computes the "natural"
+    shape and itemsize of slist, and combines this with the specified
+    itemsize and shape,  checking for consistency.
+
+    Specifying an itemsize overrides the slist's natural itemsize.
+
+    >>> _slistShape(["this","that"], itemsize=10)
+    ((2,), 10)
+    >>> _slistShape(["this","that"], itemsize=3)
+    ((2,), 3)
+
+    Specifying a shape checks for consistency against the slist's shape.
+
+    >>> _slistShape(["this","that"], shape=(2,1,1))
+    ((2, 1, 1), 4)
+    >>> _slistShape(["this","that"], shape=(3,2))
+    Traceback (most recent call last):
+    ...    
+    ValueError: Inconsistent list and shape
+    
+    """
+    shape_items = _slistShape0(slist)
+    if shape is None:
+        shape = shape_items[0]
+    else:
+        if _gen.product(shape) != _gen.product(shape_items[0]):
+            raise ValueError("Inconsistent list and shape")
+    if itemsize is None:
+        itemsize = shape_items[-1]
+    else:
+        pass # specified itemsize => padded extension or silent truncation.
+    return (shape, itemsize)
+
+def _pad(slist, n, c=" "):
+    """_pad(slist, n, c=' ') pads each member of string list 'slist' with
+    fill character 'c' to a total length of 'n' characters and returns
+    the concatenated results.
+
+    strings longer than n are *truncated*.
+
+    >>>
+    >>> _pad(["this","that","the other"],9," ")
+    'this     that     the other'
+    """
+    if isinstance(slist, types.StringType):
+        if n > len(slist):
+            return slist + c*(n-len(slist))
+        else:
+            return slist[:n]
+    else:
+        result = []
+        for s in slist:
+            if isinstance(s, types.StringType):
+                if n > len(s):
+                    t = s + c*(n-len(s))
+                else:
+                    t = s[:n]
+            else:
+                t = _pad(s, n, c)
+            result.append(t)
+        return "".join(result)
+
+def fromlist(slist, itemsize=None, shape=None, padc=" ", kind=CharArray):
+    """fromlist(slist, padc=" ") creates a CharArray from a multi-dimensional
+    list of strings, 'slist', padding each string to the length of the
+    longest string with character 'padc'.
+
+    >>> s=fromlist([["this","that"],["x","y"]])
+    >>> s
+    CharArray([['this', 'that'],
+               ['x', 'y']])
+    >>> s[0][0]
+    'this'
+    >>> s[1][1]
+    'y'
+    >>> s[1][1] = "whom"
+    >>> s[1][1]
+    'whom'
+    >>> fromlist(['this', 'that'], itemsize=2)
+    CharArray(['th', 'th'])
+    >>> fromlist(['t','u'], itemsize=3)
+    CharArray(['t', 'u'])
+    >>> fromlist(['t','u'], itemsize=3)._itemsize
+    3
+    """
+    slist = list(slist)  # convert tuples
+    shape, itemsize = _slistShape(slist, itemsize=itemsize, shape=shape)
+    s = _pad(slist, itemsize, padc)  # compute padded concatenation of slist
+    return fromstring(s, shape=shape, itemsize=itemsize, padc=padc, kind=kind)
+
+def _stringToBuffer(datastring):
+    """_stringToBuffer(datastring)  allocates a buffer, copies datastring into
+    it, and returns the buffer.
+    """
+    abuff = memory.new_memory( len(datastring) )
+    memory.writeable_buffer(abuff)[:] = datastring
+    return abuff
+
+def fromstring(s, itemsize=None, shape=None, padc=" ", kind=CharArray):
+    """Create an array from binary data contained in a string (by copying)
+    >>> fromstring('thisthat', itemsize=4)
+    CharArray(['this', 'that'])
+    >>> fromstring('thisthat', shape=2)
+    CharArray(['this', 'that'])
+    >>> fromstring('this is a test', shape=(1,))
+    CharArray(['this is a test'])
+    """
+    if isinstance(shape, types.IntType):
+        shape = (shape,)
+    if ((shape in [None, (1,)])
+        and (itemsize is not None
+             and itemsize > len(s))):
+        s = _pad(s, itemsize, padc)
+    if shape is None and not itemsize:
+        shape = (1,)
+        itemsize = len(s)
+    return kind(_stringToBuffer(s), shape=shape, itemsize=itemsize, padc=padc)
+
+
+def fromfile(file, itemsize=None, shape=None, padc=" ", kind=CharArray):
+    """Create an array from binary file data
+
+    If file is a string then that file is opened, else it is assumed
+    to be a file object. No options at the moment, all file positioning
+    must be done prior to this function call with a file object
+
+    >>> import testdata
+    >>> s=fromfile(testdata.filename, shape=-1, itemsize=80)
+    >>> s[0]
+    'SIMPLE  =                    T / file does conform to FITS standard'
+    >>> s._shape
+    (108,)
+    >>> s._itemsize
+    80
+    """
+    if isinstance(shape, types.IntType):
+        shape = (shape,)
+
+    name =  0
+    if isString(file):
+        name = 1
+        file = open(file, 'rb')
+    size = int(os.path.getsize(file.name) - file.tell())
+
+    if not shape and not itemsize:
+        shape = (1,)
+        itemsize = size
+    elif shape is not None:
+        if itemsize is not None:
+            shapesize = _na.product(shape)*itemsize
+            if shapesize < 0:
+                shape = list(shape)
+                shape[ shape.index(-1) ] = size / -shapesize
+                shape = tuple(shape)
+        else:
+            shapesize=_na.product(shape)
+            if shapesize < 0:
+                raise ValueError("Shape dimension of -1 requires itemsize.")
+            itemsize = size / shapesize
+    elif itemsize:
+        shape = (size/itemsize,)
+    else:
+        raise ValueError("Must define shape or itemsize.")
+
+    nbytes = _na.product(shape)*itemsize
+
+    if nbytes > size:
+        raise ValueError(
+                "Not enough bytes left in file for specified shape and type")
+
+    # create the array
+    arr = kind(None, shape=shape, itemsize=itemsize, padc=padc)
+    nbytesread = file.readinto(arr._data)
+    if nbytesread != nbytes:
+        raise IOError("Didn't read as many bytes as expected")
+    if name:
+        file.close()
+    return arr
+
+def array(buffer=None, itemsize=None, shape=None, byteoffset=0,
+          bytestride=None, padc=" ", kind=CharArray):
+    """array(buffer=None, itemsize=None, shape=None) creates a new instance
+    of a CharArray.
+
+    buffer      specifies the source of the array's initialization data.
+                type(buffer) is in [ None, CharArray, StringType,
+                                     ListType, FileType, BufferType].
+
+    itemsize    specifies the fixed maximum length of the array's strings.
+
+    shape       specifies the array dimensions.
+
+
+    >>> array(None,itemsize=3,shape=(1,))
+    CharArray([' '])
+    >>> array(buffer("abcdsedxxxxxncxn"), itemsize=2, byteoffset=4, bytestride=8)
+    CharArray(['se', 'nc'])
+    >>> array("abcd", itemsize=2)
+    CharArray(['ab', 'cd'])
+    >>> array(['this', 'that'], itemsize=10)
+    CharArray(['this', 'that'])
+    >>> array(['this', 'that'], itemsize=10)._itemsize
+    10
+    >>> array(array(['this', 'that'], itemsize=10))
+    CharArray(['this', 'that'])
+    >>> import testdata
+    >>> array(open(testdata.filename,"r"),itemsize=80, shape=2)
+    CharArray(['SIMPLE  =                    T / file does conform to FITS standard',
+               'BITPIX  =                   16 / number of bits per data pixel'])
+    """
+    if isinstance(shape, types.IntType):
+        shape = (shape,)
+
+    if buffer is None or _na.SuitableBuffer(buffer):
+        return kind(buffer, itemsize=itemsize, shape=shape,
+                    byteoffset=byteoffset, bytestride=bytestride,
+                    padc=padc)
+
+    if byteoffset or bytestride is not None:
+        raise ValueError('Offset and stride can only be specified if "buffer" is a buffer or None')
+
+    if isString(buffer):
+        return fromstring(buffer, itemsize=itemsize, shape=shape,
+                          padc=padc, kind=kind)
+    elif isPySequence(buffer):
+        return fromlist(buffer, itemsize=itemsize, shape=shape,
+                        padc=padc, kind=kind)
+    elif isinstance(buffer, kind) and buffer.__class__ is kind:
+        return buffer.copy()
+    elif isinstance(buffer, RawCharArray):
+        return kind(buffer=buffer._data,
+                    itemsize=itemsize or buffer._itemsize,
+                    shape=shape or buffer._shape,
+                    padc=padc)
+    elif isinstance(buffer, types.FileType):
+        return fromfile(buffer, itemsize=itemsize, shape=shape,
+                        padc=padc, kind=kind)
+    else:
+        raise TypeError("Don't know how to handle that kind of buffer")
+
+def asarray(buffer=None, itemsize=None, shape=None, byteoffset=0,
+            bytestride=None, padc=" ", kind=CharArray):
+    """massages a sequence into a chararray.
+
+    If buffer is *already* a chararray of the appropriate kind, it is
+    returned unaltered.
+    """
+    if isinstance(buffer, kind) and buffer.__class__ is kind:
+        return buffer
+    else:
+        return array(buffer, itemsize, shape, byteoffset, bytestride,
+                     padc, kind)
+
+inputarray = asarray  # obosolete synonym
+
+def take(array, indices, outarr=None, axis=0, clipmode=_na.RAISE):
+    a = asarray(array)
+    return _gen.take(a, indices, outarr, axis, clipmode)
+take.__doc__ = _gen.take.__doc__
+
+def put(array, indices, values, axis=0, clipmode=_na.RAISE):
+    a = asarray(array)
+    v = asarray(values)
+    return _gen.put(a, indices, v, axis, clipmode)
+put.__doc__ = _gen.put.__doc__
+
+def _bufferItems(buffer, offset=0, bytestride=None, itemsize=None):
+    """
+    >>> _bufferItems(buffer("0123456789"), offset=2, bytestride=4, itemsize=1)
+    2
+    >>> _bufferItems(buffer("0123456789"), offset=1, bytestride=2, itemsize=1)
+    5
+    >>> _bufferItems(buffer("0123456789"), bytestride=5, itemsize=2)
+    2
+    >>> _bufferItems(buffer("0123456789"), bytestride=None, itemsize=2)
+    5
+    >>> _bufferItems(buffer("0123456789"), offset=3)
+    7
+    >>> _bufferItems(buffer("abcdsedxxxxxncxn"),itemsize=2,offset=4,bytestride=8)
+    2
+    >>> cc=CharArray(buffer('abcdef'*5),shape=(2,),itemsize=2,byteoffset=8,bytestride=20,aligned=0)
+    >>> cc
+    CharArray(['cd', 'ef'])
+    """
+    if bytestride is None:
+        if itemsize is None:
+            return len(buffer) - offset
+        else:
+            if itemsize:
+                return (len(buffer)-offset)/itemsize
+            else:
+                return 1 # Hack to permit 0 length strings
+    else:
+        if itemsize is None:
+            raise ValueError("Must specify itemsize if bytestride is specified.")
+        strides = (len(buffer)-offset)*1.0/bytestride
+        istrides = int(strides)
+        fstrides = int((strides-istrides)*bytestride+0.5)
+        return istrides +  (fstrides >= itemsize)
+
+def num2char(n, format, itemsize=32):
+    """num2char formats NumArray 'num' into a CharArray using 'format'
+
+    >>> num2char(_na.arange(0.0,5), '%2.2f')
+    CharArray(['0.00', '1.00', '2.00', '3.00', '4.00'])
+    >>> num2char(_na.arange(0.0,5), '%d')
+    CharArray(['0', '1', '2', '3', '4'])
+    >>> num2char(_na.arange(5), "%02d")
+    CharArray(['00', '01', '02', '03', '04'])
+
+    Limitations:
+    
+    1. When formatted values are too large to fit into strings of
+    length itemsize, the values are truncated, possibly losing
+    significant information.
+
+    2. Complex numbers are not supported.
+    
+    """
+    n = _na.asarray(n)
+
+    if isinstance(n.type(), _nt.ComplexType):
+        raise NotImplementedError("num2char doesn't support complex types yet.")
+    if n.type() == _na.Float64:
+        wnum = n
+    else:
+        wnum = n.astype(_na.Float64)
+    char = CharArray(shape=n.getshape(), itemsize=itemsize)
+    _chararray.Format(format, wnum, char)
+    return char
+
+def _nothing(*args):  return 0  # Test everything... nothing private.
+
+def test():
+    if sys.version_info < (2,4):
+        import doctest, strings
+        return doctest.testmod(strings, isprivate=_nothing)
+    else:
+        import numarray.numtest as nt, strings
+        t  = nt.Tester(globs=globals())
+        t.rundoc(strings)
+        return t.summarize()
+
+if __name__ == "__main__":
+    test()
author	Travis Oliphant <oliphant@enthought.com>	2005-11-02 23:54:48 +0000
committer	Travis Oliphant <oliphant@enthought.com>	2005-11-02 23:54:48 +0000
commit	e484db1b4e730d58a395381aa1b4a98dee834f59 (patch)
tree	baab5dff644bfa65690aeeb776630a7fba306e68 /scipy/base/chararray.py
parent	0e488269b5feebe4fb47380fdf19a21da95b1b70 (diff)
download	numpy-e484db1b4e730d58a395381aa1b4a98dee834f59.tar.gz