diff options
author | Travis Oliphant <oliphant@enthought.com> | 2005-11-02 23:54:48 +0000 |
---|---|---|
committer | Travis Oliphant <oliphant@enthought.com> | 2005-11-02 23:54:48 +0000 |
commit | e484db1b4e730d58a395381aa1b4a98dee834f59 (patch) | |
tree | baab5dff644bfa65690aeeb776630a7fba306e68 /scipy/base/chararray.py | |
parent | 0e488269b5feebe4fb47380fdf19a21da95b1b70 (diff) | |
download | numpy-e484db1b4e730d58a395381aa1b4a98dee834f59.tar.gz |
Altered flexible types so that NotImplemented is raised in case another type wants to handle it.
Diffstat (limited to 'scipy/base/chararray.py')
-rw-r--r-- | scipy/base/chararray.py | 1234 |
1 files changed, 1234 insertions, 0 deletions
diff --git a/scipy/base/chararray.py b/scipy/base/chararray.py new file mode 100644 index 000000000..93210a797 --- /dev/null +++ b/scipy/base/chararray.py @@ -0,0 +1,1234 @@ +# TODO: This needs to be +# fleshed out +# Adapted from Numarray by J. Todd Miller +""" +Large chararray test +>>> xx=array(None,itemsize=3,shape=220000) + +>>> import cPickle +>>> c=cPickle.loads(cPickle.dumps(fromlist(["this","that","something else"]))) +>>> c +CharArray(['this', 'that', 'something else']) +>>> c._type +CharArrayType(14) + +>>> a=fromlist(["this"]*25); a.shape=(5,5); a[ range(2,4) ] +CharArray([['this', 'this', 'this', 'this', 'this'], + ['this', 'this', 'this', 'this', 'this']]) +>>> a[ range(2,4) ] = fromlist(["that"]); a +CharArray([['this', 'this', 'this', 'this', 'this'], + ['this', 'this', 'this', 'this', 'this'], + ['that', 'that', 'that', 'that', 'that'], + ['that', 'that', 'that', 'that', 'that'], + ['this', 'this', 'this', 'this', 'this']]) + +>>> array([], shape=(0,1,2)) +CharArray([]) + +>>> a = _gen.concatenate([array(["1"]*3), array(["2"]*3)]); a +CharArray(['1', '1', '1', '2', '2', '2']) +>>> _gen.reshape(a, (2,3)) +CharArray([['1', '1', '1'], + ['2', '2', '2']]) + +>>> CharArray(buffer="thatthis", shape=(2,), itemsize=4, +... bytestride=-4, byteoffset=4) +CharArray(['this', 'that']) +""" + +from copy import _EmptyClass +import memory +import _bytes +import _chararray +import numarrayall as _na +import generic as _gen +import sys +import string +import types +import os +import re +import arrayprint +import warnings +import numerictypes as _nt +import numinclude as _numinclude +import _ndarray as _nda + +_globals, _locals = globals, locals + +class CharArrayType: + def __init__(self, itemsize): + self.itemsize = itemsize + self.name = "CharArrayType(%d)" % (self.itemsize,) + + def __repr__(self): + return self.name + +CharArrayTypeCache = {} + +def NewCharArrayType(itemsize): + """NewCharArrayType creates CharArrayTypes on demand, but checks to see + if they already exist in the cache first. This makes type equivalence + the same as object identity. + """ + if itemsize not in CharArrayTypeCache.keys(): + CharArrayTypeCache[itemsize] = CharArrayType(itemsize) + return CharArrayTypeCache[itemsize] + + +class PrecisionWarning(UserWarning): + pass + + +class RawCharArray(_na.NDArray): + """RawCharArray(buffer=None, shape=None, byteoffset=0, bytestride=None) + + RawCharArray is a fixed length array of characters based on + ndarray.NDArray with no automatic stripping or padding. + + itemsize specifies the length of all strings in the array. + """ + _is_raw = 1 + def __init__(self, buffer=None, itemsize=None, shape=None, byteoffset=0, + bytestride=None, aligned=1, type=None, padc=" "): + + if isinstance(shape, types.IntType): + shape = (shape,) + + if type is not None: + if itemsize is not None: + raise ValueError("Specify type *or* itemsize, not both.") + itemsize = type.itemsize + + if not (padc, types.StringType) or len(padc) <> 1: + raise ValueError("padc must be a string of length 1.") + + if buffer is None: + if shape is None or itemsize is None: + raise ValueError("Must define both shape & itemsize if buffer is None") + else: + if shape is None and itemsize is None: + raise ValueError("Must specify shape, itemsize, or both.") + ni = _bufferItems( buffer, byteoffset, bytestride, itemsize) + if shape and itemsize == None: + itemsize = ni/_na.product(shape) + if itemsize and shape == None: + shape = (ni,) + if itemsize == 0: # Another hack for 0 length strings. + bytestride=0 + + if not _nda.is_buffer(buffer) and buffer is not None: + raise TypeError("buffer must either support the C buffer protocol or return something that does from its __buffer__() method"); + + _na.NDArray.__init__(self, shape=shape, itemsize=itemsize, + buffer=buffer, byteoffset=byteoffset, + bytestride=bytestride, aligned=aligned) + + self._flags |= _gen._UPDATEDICT + + if type is None: + type = NewCharArrayType(itemsize) + + self._type = type + self._padc = padc + + if buffer is None: + self.fill(" ") + + def __get_array_typestr__(self): + return '|S%d' % self._itemsize + + __array_typestr__ = property(__get_array_typestr__, None, "") + + def tolist(self): + """returns array as a (nested) list of strings.""" + if len(self._shape) == 1: + if self._shape[0] > 0: + return [ x for x in self ] + else: + return [] + else: + return [ ni.tolist() for ni in self ] + + def __getstate__(self): + """returns RawCharArray state dictionary for pickling""" + state = _na.NDArray.__getstate__(self) + state["_type"] = self._type.itemsize + return state + + def __setstate__(self, state): + """sets RawCharArray dictionary from pickled state""" + _na.NDArray.__setstate__(self, state) + self._type = NewCharArrayType(state["_type"]) + + def isbyteswapped(self): + """CharArray's are *never* byteswapped. returns 0.""" + return 0 + + def isaligned(self): + """CharArray's are *always* aligned. returns 1.""" + return 1 + + def sinfo(self): + "returns string describing a CharArray" + s = _na.NDArray.sinfo(self) + s += "type: " + repr(self._type) + "\n" + return s + + def _getitem(self, offset): + """_getitem(self, offset) returns the "stripped" fixed length + string from 'self' at 'offset'. + """ + if isinstance(self._data, memory.MemoryType): + s = buffer(self._data)[offset:offset+self._itemsize] + return self.strip(s) + else: + return self.strip(str(self._data[offset:offset+self._itemsize])) + + def _setitem(self, offset, value): + """_setitem(self, offset) sets 'offset' to result of "padding" 'value'. + + _setitem silently truncates inputs which are too long. + + >>> s=array([""]) + >>> s[0] = "this" + >>> s + CharArray(['']) + >>> s=array(["this","that"]) + >>> s[0] = "othe" + >>> s + CharArray(['othe', 'that']) + >>> s[1] = "v" + >>> s + CharArray(['othe', 'v']) + >>> s = array("") + >>> s[0] = "this" + >>> s + CharArray(['']) + """ + bo = offset + where = memory.writeable_buffer(self._data) + where[bo:bo+self._itemsize] = self.pad(value)[0:self._itemsize] + + def _byteView(self): + """_byteView(self) returns a view of self as an array of bytes. + A _byteView cannot be taken from a chararray with itemsize==0. + """ + if self._itemsize == 0: + raise ValueError("_byteView doesn't work for zero length items.") + b = _na.NumArray(buffer=self._data, + shape=self._shape+(self._itemsize,), + type=_na.UInt8, + byteoffset=self._byteoffset, + bytestride=self._bytestride) + b._strides = self._strides + (1,) + return b; + + def _copyFrom(self, arr): + """ + >>> c = array([""]) + >>> c[:] = array(["this"]) + >>> c + CharArray(['']) + >>> c = array(["this","that"]) + >>> c[:] = array(["a really long line","a"]) + >>> c + CharArray(['a re', 'a']) + >>> c[:] = ["more","money"] + >>> c + CharArray(['more', 'mone']) + >>> c[:] = array(["x"]) + >>> c + CharArray(['x', 'x']) + """ + if self._itemsize == 0: + return + + # Convert lists and strings to chararray. + arr = asarray(arr, itemsize=self._itemsize, + padc=self._padc, kind=self.__class__) + + # Ensure shapes match. + arr = self._broadcast( arr ) + if arr._itemsize == 0: return + + # Get views of both source and destination as UInt8 numarray. + it = arr._byteView() + me = self._byteView() + if self._itemsize <= arr._itemsize: + me[:] = it[..., :self._itemsize] + else: + me[...,:it._shape[-1]] = it + # zero fill the part of subarr *not* covered by arr + me[...,it._shape[-1]:] = 0 + + def copy(self): + """Return a new array with the same shape and type, but a copy + of the data + + >>> c = fromlist(["this","that", "another"]) + >>> d = c.copy() + >>> d + CharArray(['this', 'that', 'another']) + >>> int(c._data is d._data) + 0 + """ + + arr = self.view() + arr._data = memory.new_memory(arr._itemsize * arr.nelements()) + arr._byteoffset = 0 + arr._bytestride = arr._itemsize + arr._strides = arr._stridesFromShape() + arr._itemsize = self._itemsize + if _na.product(self._shape): + copyfunction = _bytes.functionDict["copyNbytes"] + copyfunction(arr._shape, self._data, self._byteoffset, + self._strides, arr._data, 0, arr._strides, + arr._itemsize) + return arr + + def substringView(self, i, j): + """substringView returns modified view of the input array which + represents only the [i:j] substring of each array element. + + >>> c = fromlist([["this","that"],["another", "one"]]) + >>> d = c.substringView(1, 2); d + CharArray([['h', 'h'], + ['n', 'n']]) + >>> d[:] = [["1","2"],["3","4"]]; d + CharArray([['1', '2'], + ['3', '4']]) + >>> c + CharArray([['t1is', 't2at'], + ['a3other', 'o4e']]) + """ + + n = _na.arange(self._itemsize)[i:j] + if len(n) != 0: + i = n[0] + j = n[-1]+1 + else: + i = j = 0 + r = self.view() + substr_size = j-i + if substr_size < 0: + substr_size = 0 + r._itemsize = substr_size + r._byteoffset += i + return r + + def _broadcast(self, other): + return _na.NDArray._broadcast(self, other) + + def _dualbroadcast(self, other): + s, o = _na.NDArray._dualbroadcast(self, other) + if not _na.product(s._strides): + s = s.copy() + if not _na.product(o._strides): + o = o.copy() + return s, o + + def concatenate(self, other): + """concatenate(self, other) concatenates two numarray element by element + >>> array(["this", "that", "another"]).stripAll() + "." + CharArray(['this.', 'that.', 'another.']) + >>> array([""])+array([""]) + CharArray(['']) + """ + a = asarray(other, padc=self._padc, kind=self.__class__) + self, a = self._dualbroadcast(a) + result = array(buffer=None, shape=self._shape, + itemsize=self._itemsize+a._itemsize, + padc=self._padc, kind=self.__class__) + if a is other: # since stripAll() mutates the array + a = a.copy() + _chararray.Concat(self.__class__ is RawCharArray, + self, a.stripAll(), result) + return result.padAll() + + def __add__(self, other): + """ + >>> map(str, range(3)) + array(["this","that","another one"]) + CharArray(['0this', '1that', '2another one']) + >>> "" + array(["this", "that"]) + CharArray(['this', 'that']) + >>> "prefix with trailing whitespace " + array(["."]) + CharArray(['prefix with trailing whitespace .']) + >>> "" + array("") + CharArray(['']) + >>> array(["this", "that", "another one"], kind=RawCharArray) + map(str, range(3)) + RawCharArray(['this 0', 'that 1', 'another one2']) + """ + return self.concatenate(other) + + def __radd__(self, other): + return asarray(other, padc=self._padc, + kind=self.__class__).concatenate(self) + + def __iadd__(self, other): + self[:] = self.concatenate(other) + return self + + def strip(self, value): + return value + + def pad(self, value): + return value + + def stripAll(self): + return self + + def padAll(self): + return self + + def _format(self, x): + """_format() formats a single array element for str() or repr()""" + return repr(self.strip(x)) + + def __cmp__(self, other): + s, t = str(self), str(other) + return cmp(s,t) + + def fill(self, char): + """fill(self, char) fills the array entirely with 'char'. + + >>> x=array([""]) + >>> x.fill(' ') + >>> x + CharArray(['']) + >>> x=array(["this"]) + >>> x.fill("x") + >>> x + CharArray(['xxxx']) + """ + if self._itemsize and self.nelements(): + if self.rank > 0: + self[:] = char*self._itemsize + else: + self[()] = char*self._itemsize + + def raw(self): + """raw(self) returns a raw view of self. + >>> c=fromlist(["this","that","another"]) + >>> c.raw() + RawCharArray(['this ', 'that ', 'another']) + """ + arr = self.view() + arr.__class__ = RawCharArray # "Anchor" on RawCharArray. + return arr + + def contiguous(self): + """contiguous(self) returns a version of self which is guaranteed to + be contiguous. If self is contiguous already, it returns self. + Otherwise, it returns a copy of self. + """ + if self.iscontiguous(): + return self + else: + return self.copy() + + def resized(self, n, fill='\0'): + """resized(self, n) returns a copy of self, resized so that each + item is of length n characters. Extra characters are filled with + the value of 'fill'. If self._itemsize == n, self is returned. + + >>> c = fromlist(["this","that","another"]) + >>> c._itemsize + 7 + >>> d=c.resized(20) + >>> d + CharArray(['this', 'that', 'another']) + >>> d._itemsize + 20 + """ + if self._itemsize != n: + ext = self.__class__(shape=self._shape, itemsize=n) + ext.fill(fill) + ext[:] = self + return ext + else: + return self + + # First half of comparison operators, slow version. + def _StrCmp(self, mode, raw, other0): + """StrCmp(self, other0) calls strncmp on corresponding items of the + two numarray, self and other. + """ + if not isinstance(other0, self.__class__): + other = asarray(other0, padc=self._padc, kind=self.__class__) + else: + other = other0 + if self._shape != other._shape: + self, other = self._dualbroadcast(other) + if self._itemsize < other._itemsize: + self = self.resized(other._itemsize) + elif other._itemsize < self._itemsize: + other = other.resized(self._itemsize) + if self is None: + raise ValueError("Incompatible array dimensions") + return _chararray.StrCmp(self, mode, raw, other) + + # rich comparisons (only works in Python 2.1 and later) + def __eq__(self, other): + """ + >>> array(["this ", "thar", "other"]).__eq__(array(["this", "that", "another"])) + array([1, 0, 0], type=Bool) + >>> array([""]).__eq__(array([""])) + array([1], type=Bool) + >>> array([""]).__eq__(array(["x"])) + array([0], type=Bool) + >>> array(["x"]).__eq__(array([""])) + array([0], type=Bool) + """ + return _chararray.StrCmp(self, 0, self._is_raw, other) + + def __ne__(self, other): + """ + >>> s=array(["this ", "thar", "other"]) + >>> t=array(["this", "that", "another"]) + >>> s.__ne__(t) + array([0, 1, 1], type=Bool) + """ + return _chararray.StrCmp(self, 1, self._is_raw, other) + + def __lt__(self, other): + """ + >>> s=array(["this ", "thar", "other"]) + >>> t=array(["this", "that", "another"]) + >>> s.__lt__(t) + array([0, 1, 0], type=Bool) + """ + return _chararray.StrCmp(self, 2, self._is_raw, other) + + def __gt__(self, other): + """ + >>> s=array(["this ", "thar", "other"]) + >>> t=array(["this", "that", "another"]) + >>> s.__gt__(t) + array([0, 0, 1], type=Bool) + """ + return _chararray.StrCmp(self, 3, self._is_raw, other) + + def __le__(self, other): + """ + >>> s=array(["this ", "thar", "other"]) + >>> t=array(["this", "that", "another"]) + >>> s.__le__(t) + array([1, 1, 0], type=Bool) + """ + return _chararray.StrCmp(self, 4, self._is_raw, other) + + def __ge__(self, other): + """ + >>> s=array(["this ", "thar", "other"]) + >>> t=array(["this", "that", "another"]) + >>> s.__ge__(t) + array([1, 0, 1], type=Bool) + """ + return _chararray.StrCmp(self, 5, self._is_raw, other) + + if sys.version_info >= (2,1,0): + def _test_rich_comparisons(): + """ + >>> s=array(["this ", "thar", "other"]) + >>> t=array(["this", "that", "another"]) + >>> s == t + array([1, 0, 0], type=Bool) + >>> s < t + array([0, 1, 0], type=Bool) + >>> s >= t + array([1, 0, 1], type=Bool) + >>> s <= t + array([1, 1, 0], type=Bool) + >>> s > t + array([0, 0, 1], type=Bool) + """ + pass + + def __contains__(self, str): + """ + Returns 1 if-and-only-if 'self' has an element == to 'str' + + >>> s=array(["this ", "thar", "other"]) + >>> int("this" in s) + 1 + >>> int("tjt" in s) + 0 + >>> x=array([""]) + >>> int("this" in x) + 0 + >>> int("" in x) + 1 + """ + return _na.logical_or.reduce(_na.ravel(self.__eq__(str))) + + def sort(self): + """ + >>> a=fromlist(["other","this","that","another"]) + >>> a.sort() + >>> a + CharArray(['another', 'other', 'that', 'this']) + """ + l = self.tolist() + l.sort() + self[:] = fromlist(l) + + def argsort(self, axis=-1): + """ + >>> a=fromlist(["other","that","this","another"]) + >>> a.argsort() + array([3, 0, 1, 2]) + """ + if axis != -1: + raise TypeError("CharArray.argsort() does not support the axis parameter.") + ax = range(len(self)) + ax.sort(lambda x,y,z=self: cmp(z[x],z[y])) + return _na.array(ax) + + def amap(self, f): + """amap() returns the nested list which results from applying + function 'f' to each element of 'self'. + """ + if len(self._shape) == 1: + return [f(i) for i in self] + else: + ans = [] + for i in self: + ans.append(i.amap(f)) + return ans + + def match(self, pattern, flags=0): + """ + >>> a=fromlist([["wo","what"],["wen","erewh"]]) + >>> a.match("wh[aebd]") + (array([0]), array([1])) + >>> a.match("none") + (array([], type=Long), array([], type=Long)) + >>> b=array([""]) + >>> b.match("this") + (array([], type=Long),) + >>> b.match("") + (array([0]),) + """ + matcher = re.compile(pattern, flags).match + l = lambda x, f=matcher: int(f(x) is not None) + matches = _na.array(self.amap(l), type=_na.Bool) + if len(matches): + return _na.nonzero(matches) + else: + return () + + def search(self, pattern, flags=0): + """ + >>> a=fromlist([["wo","what"],["wen","erewh"]]) + >>> a.search("wh") + (array([0, 1]), array([1, 1])) + >>> a.search("1") + (array([], type=Long), array([], type=Long)) + >>> b=array(["",""]) + >>> b.search("1") + (array([], type=Long),) + >>> b.search("") + (array([0, 1]),) + """ + searcher = re.compile(pattern, flags).search + l = lambda x, f=searcher: int(f(x) is not None) + matches = _na.array(self.amap(l), type=_na.Bool) + if len(matches): + return _na.nonzero(matches) + else: + return () + + def grep(self, pattern, flags=0): + """ + >>> a=fromlist([["who","what"],["when","where"]]) + >>> a.grep("whe") + CharArray(['when', 'where']) + """ + return _gen.take(self, self.match(pattern, flags), axis=(0,)) + + def sub(self, pattern, replacement, flags=0, count=0): + """ + >>> a=fromlist([["who","what"],["when","where"]]) + >>> a.sub("wh", "ph") + >>> a + CharArray([['pho', 'phat'], + ['phen', 'phere']]) + """ + cpat = re.compile(pattern, flags) + l = lambda x, p=cpat, r=replacement, c=count: re.sub(p, r, x, c) + self[:] = fromlist( self.amap(l) ) + + def eval(self): + """eval(self) converts CharArray 'self' into a NumArray. + This is the original slow implementation based on a Python loop + and the eval() function. + + >>> array([["1","2"],["3","4"]]).eval() + array([[1, 2], + [3, 4]]) + >>> try: + ... array([["1","2"],["3","other"]]).eval() + ... except NameError: + ... pass + """ + n = _na.array([ eval(x,{},{}) for x in _na.ravel(self)]) + n.setshape(self._shape) + return n + + def fasteval(self, type=_na.Float64): + + """fasteval(self, type=Float64) converts CharArray 'self' into + a NumArray of the specified type. fasteval() can't convert + complex arrays at all, and loses precision when converting + UInt64 or Int64. + + >>> array([["1","2"],["3","4"]]).fasteval().astype('Long') + array([[1, 2], + [3, 4]]) + >>> try: + ... array([["1","2"],["3","other"]]).fasteval() + ... except _chararray.error: + ... pass + """ + n = _na.array(shape=self._shape, type=_na.Float64) + type = _nt.getType(type) + _chararray.Eval((), self, n); + if type != _na.Float64: + if ((type is _na.Int64) or + (_numinclude.hasUInt64 and type is _na.UInt64)): + warnings.warn("Loss of precision converting to 64-bit type. Consider using eval().", PrecisionWarning) + return n.astype(type) + else: + return n + +class CharArray(RawCharArray): + """ + >>> array("thisthatthe othe",shape=(4,),itemsize=4) + CharArray(['this', 'that', 'the', 'othe']) + >>> array("thisthatthe othe",shape=(4,),itemsize=4)._shape + (4,) + >>> array("thisthatthe othe",shape=(4,),itemsize=4)._itemsize + 4 + >>> array([["this","that"],["x","y"]]) + CharArray([['this', 'that'], + ['x', 'y']]) + >>> array([["this","that"],["x","y"]])._shape + (2, 2) + >>> array([["this","that"],["x","y"]])._itemsize + 4 + >>> s=array([["this","that"],["x","y"]], itemsize=10) + >>> s + CharArray([['this', 'that'], + ['x', 'y']]) + >>> s._itemsize + 10 + >>> s[0][0] + 'this' + >>> # s[1,1][0] = 'z' # Char assigment doesn't work! + >>> s[1,1] = 'z' # But padding may do what you want. + >>> s # Otherwise, specify all of s[1,1] or subclass. + CharArray([['this', 'that'], + ['x', 'z']]) + """ + + _is_raw = 0 + + def resized(self, n, fill=' '): + """Same as RawCharArray.resized() but fills with blanks rather than + NUL.""" + return RawCharArray.resized(self, n, fill) + + def pad(self, value): + """ + pad(self, value) implements CharArray's string-filling policy + which is used when strings are assigned to elements of a CharArray. + Pad extends 'value' to length self._itemsize using spaces. + """ + return _chararray.Pad(value, self._itemsize, ord(self._padc)) + + def strip(self, value): + """ + strip(self, value) implements CharArray's string fetching + "cleanup" policy. strip truncates 'value' at the first NULL + and removes all trailing whitespace from the remainder. For + compatability with FITS, leading whitespace is never + completely stripped: a string beginning with a space always + returns at least one space to distinguish it from the empty + string. + """ + return _chararray.Strip(value) + + def stripAll(self): + """ + stripAll(self) applies the chararray strip function to each element + of self and returns the result. The result may be a new array. + """ + _chararray.StripAll(None, self) + return self + + def padAll(self): + """ + padAll(self) applies the chararray pad function to each element + of self and returns the result. The result may be a new array. + """ + _chararray.PadAll(self._padc, self) + return self + + def toUpper(self): + """toUpper(self) converts all elements of self to upper case + + >>> a = fromlist(["That","this","another"]) + >>> a.toUpper() + >>> a + CharArray(['THAT', 'THIS', 'ANOTHER']) + """ + _chararray.ToUpper(None, self) + + def toLower(self): + """toLower(self) converts all elements of self to upper case + + >>> a = fromlist(["THAT","this","anOther"]) + >>> a.toLower() + >>> a + CharArray(['that', 'this', 'another']) + """ + _chararray.ToLower(None, self) + + def maxLen(self): + """ + maxLen(self) computes the length of the longest string in + self. Maxlen will applies the strip function to each element + prior to computing its length. + + >>> array(["this ","that"]).maxLen() + 4 + """ + n = _na.NumArray(buffer=None, shape=self.shape, type=_na.Int32) + _chararray.StrLen(None, self, n) + return n.max() + + def truncated(self): + """ + truncate(self) returns a new array with the smallest possible itemsize + which will hold the stripped contents of self. + + >>> array(["this ","that"])._itemsize + 6 + >>> array(["this ","that"]).truncated()._itemsize + 4 + """ + return self.resized(self.maxLen()) + + def count(self, s): + """count(self, s) counts the number of occurences of string 's'. + >>> int(array(["this","that","another","this"]).count("this")) + 2 + """ + return self.__eq__(s).sum('Int64') + + def index(self, s): + """index(self, s) returns the index of the first occurenced of + 's' in 'self'. + + >>> array([["this","that","another"], + ... ["another","this","that"]]).index("another") + (0, 2) + >>> array([["this","that","another"], + ... ["another","this","that"]]).index("not here") + Traceback (most recent call last): + ValueError: string 'not here' not in array + + """ + indices = _na.nonzero(self.__eq__(s)) + if len(indices[0]): + first = map(lambda x: x[0], indices) + return tuple(first) + else: + raise ValueError("string " + `s` +" not in array") + +def isString(s): + return isinstance(s, types.StringType) + +def isPySequence(s): + return hasattr(s, '__getitem__') and hasattr(s,'__len__') + +def _slistShape0(slist): + """_slistShape0(slist) computes the (shape+(itemsize,)) tuple + of string list 'slist'. + + itemsize is set to the maximum of all string lengths in slist. + + >>> s=["this","that","the other"] + >>> _slistShape(s) + ((3,), 9) + >>> _slistShape((s,s,s,s)) + ((4, 3), 9) + >>> _slistShape(["this", ["that","other"]]) + Traceback (most recent call last): + ... + ValueError: Nested sequences with different lengths. + """ + if isinstance(slist, types.StringType): + return ((), len(slist),) + elif len(slist) == 0: + return ((0,), 0) + else: + maxs = _slistShape0(slist[0]) + sizes = {} + for s in slist: + if isinstance(s, types.StringType): + maxs = max(maxs, ((), len(s))) + sizes[1] = 1 # ignore + else: + maxs = max(maxs, _slistShape0(s)) + sizes[len(s)] = 1 + if len(sizes.keys()) != 1: + raise ValueError("Nested sequences with different lengths.") + return (((len(slist),)+ maxs[0]), maxs[1]) + +def _slistShape(slist, itemsize=None, shape=None): + """_slistShape(slist, itemsize=None, shape=None) computes the "natural" + shape and itemsize of slist, and combines this with the specified + itemsize and shape, checking for consistency. + + Specifying an itemsize overrides the slist's natural itemsize. + + >>> _slistShape(["this","that"], itemsize=10) + ((2,), 10) + >>> _slistShape(["this","that"], itemsize=3) + ((2,), 3) + + Specifying a shape checks for consistency against the slist's shape. + + >>> _slistShape(["this","that"], shape=(2,1,1)) + ((2, 1, 1), 4) + >>> _slistShape(["this","that"], shape=(3,2)) + Traceback (most recent call last): + ... + ValueError: Inconsistent list and shape + + """ + shape_items = _slistShape0(slist) + if shape is None: + shape = shape_items[0] + else: + if _gen.product(shape) != _gen.product(shape_items[0]): + raise ValueError("Inconsistent list and shape") + if itemsize is None: + itemsize = shape_items[-1] + else: + pass # specified itemsize => padded extension or silent truncation. + return (shape, itemsize) + +def _pad(slist, n, c=" "): + """_pad(slist, n, c=' ') pads each member of string list 'slist' with + fill character 'c' to a total length of 'n' characters and returns + the concatenated results. + + strings longer than n are *truncated*. + + >>> + >>> _pad(["this","that","the other"],9," ") + 'this that the other' + """ + if isinstance(slist, types.StringType): + if n > len(slist): + return slist + c*(n-len(slist)) + else: + return slist[:n] + else: + result = [] + for s in slist: + if isinstance(s, types.StringType): + if n > len(s): + t = s + c*(n-len(s)) + else: + t = s[:n] + else: + t = _pad(s, n, c) + result.append(t) + return "".join(result) + +def fromlist(slist, itemsize=None, shape=None, padc=" ", kind=CharArray): + """fromlist(slist, padc=" ") creates a CharArray from a multi-dimensional + list of strings, 'slist', padding each string to the length of the + longest string with character 'padc'. + + >>> s=fromlist([["this","that"],["x","y"]]) + >>> s + CharArray([['this', 'that'], + ['x', 'y']]) + >>> s[0][0] + 'this' + >>> s[1][1] + 'y' + >>> s[1][1] = "whom" + >>> s[1][1] + 'whom' + >>> fromlist(['this', 'that'], itemsize=2) + CharArray(['th', 'th']) + >>> fromlist(['t','u'], itemsize=3) + CharArray(['t', 'u']) + >>> fromlist(['t','u'], itemsize=3)._itemsize + 3 + """ + slist = list(slist) # convert tuples + shape, itemsize = _slistShape(slist, itemsize=itemsize, shape=shape) + s = _pad(slist, itemsize, padc) # compute padded concatenation of slist + return fromstring(s, shape=shape, itemsize=itemsize, padc=padc, kind=kind) + +def _stringToBuffer(datastring): + """_stringToBuffer(datastring) allocates a buffer, copies datastring into + it, and returns the buffer. + """ + abuff = memory.new_memory( len(datastring) ) + memory.writeable_buffer(abuff)[:] = datastring + return abuff + +def fromstring(s, itemsize=None, shape=None, padc=" ", kind=CharArray): + """Create an array from binary data contained in a string (by copying) + >>> fromstring('thisthat', itemsize=4) + CharArray(['this', 'that']) + >>> fromstring('thisthat', shape=2) + CharArray(['this', 'that']) + >>> fromstring('this is a test', shape=(1,)) + CharArray(['this is a test']) + """ + if isinstance(shape, types.IntType): + shape = (shape,) + if ((shape in [None, (1,)]) + and (itemsize is not None + and itemsize > len(s))): + s = _pad(s, itemsize, padc) + if shape is None and not itemsize: + shape = (1,) + itemsize = len(s) + return kind(_stringToBuffer(s), shape=shape, itemsize=itemsize, padc=padc) + + +def fromfile(file, itemsize=None, shape=None, padc=" ", kind=CharArray): + """Create an array from binary file data + + If file is a string then that file is opened, else it is assumed + to be a file object. No options at the moment, all file positioning + must be done prior to this function call with a file object + + >>> import testdata + >>> s=fromfile(testdata.filename, shape=-1, itemsize=80) + >>> s[0] + 'SIMPLE = T / file does conform to FITS standard' + >>> s._shape + (108,) + >>> s._itemsize + 80 + """ + if isinstance(shape, types.IntType): + shape = (shape,) + + name = 0 + if isString(file): + name = 1 + file = open(file, 'rb') + size = int(os.path.getsize(file.name) - file.tell()) + + if not shape and not itemsize: + shape = (1,) + itemsize = size + elif shape is not None: + if itemsize is not None: + shapesize = _na.product(shape)*itemsize + if shapesize < 0: + shape = list(shape) + shape[ shape.index(-1) ] = size / -shapesize + shape = tuple(shape) + else: + shapesize=_na.product(shape) + if shapesize < 0: + raise ValueError("Shape dimension of -1 requires itemsize.") + itemsize = size / shapesize + elif itemsize: + shape = (size/itemsize,) + else: + raise ValueError("Must define shape or itemsize.") + + nbytes = _na.product(shape)*itemsize + + if nbytes > size: + raise ValueError( + "Not enough bytes left in file for specified shape and type") + + # create the array + arr = kind(None, shape=shape, itemsize=itemsize, padc=padc) + nbytesread = file.readinto(arr._data) + if nbytesread != nbytes: + raise IOError("Didn't read as many bytes as expected") + if name: + file.close() + return arr + +def array(buffer=None, itemsize=None, shape=None, byteoffset=0, + bytestride=None, padc=" ", kind=CharArray): + """array(buffer=None, itemsize=None, shape=None) creates a new instance + of a CharArray. + + buffer specifies the source of the array's initialization data. + type(buffer) is in [ None, CharArray, StringType, + ListType, FileType, BufferType]. + + itemsize specifies the fixed maximum length of the array's strings. + + shape specifies the array dimensions. + + + >>> array(None,itemsize=3,shape=(1,)) + CharArray([' ']) + >>> array(buffer("abcdsedxxxxxncxn"), itemsize=2, byteoffset=4, bytestride=8) + CharArray(['se', 'nc']) + >>> array("abcd", itemsize=2) + CharArray(['ab', 'cd']) + >>> array(['this', 'that'], itemsize=10) + CharArray(['this', 'that']) + >>> array(['this', 'that'], itemsize=10)._itemsize + 10 + >>> array(array(['this', 'that'], itemsize=10)) + CharArray(['this', 'that']) + >>> import testdata + >>> array(open(testdata.filename,"r"),itemsize=80, shape=2) + CharArray(['SIMPLE = T / file does conform to FITS standard', + 'BITPIX = 16 / number of bits per data pixel']) + """ + if isinstance(shape, types.IntType): + shape = (shape,) + + if buffer is None or _na.SuitableBuffer(buffer): + return kind(buffer, itemsize=itemsize, shape=shape, + byteoffset=byteoffset, bytestride=bytestride, + padc=padc) + + if byteoffset or bytestride is not None: + raise ValueError('Offset and stride can only be specified if "buffer" is a buffer or None') + + if isString(buffer): + return fromstring(buffer, itemsize=itemsize, shape=shape, + padc=padc, kind=kind) + elif isPySequence(buffer): + return fromlist(buffer, itemsize=itemsize, shape=shape, + padc=padc, kind=kind) + elif isinstance(buffer, kind) and buffer.__class__ is kind: + return buffer.copy() + elif isinstance(buffer, RawCharArray): + return kind(buffer=buffer._data, + itemsize=itemsize or buffer._itemsize, + shape=shape or buffer._shape, + padc=padc) + elif isinstance(buffer, types.FileType): + return fromfile(buffer, itemsize=itemsize, shape=shape, + padc=padc, kind=kind) + else: + raise TypeError("Don't know how to handle that kind of buffer") + +def asarray(buffer=None, itemsize=None, shape=None, byteoffset=0, + bytestride=None, padc=" ", kind=CharArray): + """massages a sequence into a chararray. + + If buffer is *already* a chararray of the appropriate kind, it is + returned unaltered. + """ + if isinstance(buffer, kind) and buffer.__class__ is kind: + return buffer + else: + return array(buffer, itemsize, shape, byteoffset, bytestride, + padc, kind) + +inputarray = asarray # obosolete synonym + +def take(array, indices, outarr=None, axis=0, clipmode=_na.RAISE): + a = asarray(array) + return _gen.take(a, indices, outarr, axis, clipmode) +take.__doc__ = _gen.take.__doc__ + +def put(array, indices, values, axis=0, clipmode=_na.RAISE): + a = asarray(array) + v = asarray(values) + return _gen.put(a, indices, v, axis, clipmode) +put.__doc__ = _gen.put.__doc__ + +def _bufferItems(buffer, offset=0, bytestride=None, itemsize=None): + """ + >>> _bufferItems(buffer("0123456789"), offset=2, bytestride=4, itemsize=1) + 2 + >>> _bufferItems(buffer("0123456789"), offset=1, bytestride=2, itemsize=1) + 5 + >>> _bufferItems(buffer("0123456789"), bytestride=5, itemsize=2) + 2 + >>> _bufferItems(buffer("0123456789"), bytestride=None, itemsize=2) + 5 + >>> _bufferItems(buffer("0123456789"), offset=3) + 7 + >>> _bufferItems(buffer("abcdsedxxxxxncxn"),itemsize=2,offset=4,bytestride=8) + 2 + >>> cc=CharArray(buffer('abcdef'*5),shape=(2,),itemsize=2,byteoffset=8,bytestride=20,aligned=0) + >>> cc + CharArray(['cd', 'ef']) + """ + if bytestride is None: + if itemsize is None: + return len(buffer) - offset + else: + if itemsize: + return (len(buffer)-offset)/itemsize + else: + return 1 # Hack to permit 0 length strings + else: + if itemsize is None: + raise ValueError("Must specify itemsize if bytestride is specified.") + strides = (len(buffer)-offset)*1.0/bytestride + istrides = int(strides) + fstrides = int((strides-istrides)*bytestride+0.5) + return istrides + (fstrides >= itemsize) + +def num2char(n, format, itemsize=32): + """num2char formats NumArray 'num' into a CharArray using 'format' + + >>> num2char(_na.arange(0.0,5), '%2.2f') + CharArray(['0.00', '1.00', '2.00', '3.00', '4.00']) + >>> num2char(_na.arange(0.0,5), '%d') + CharArray(['0', '1', '2', '3', '4']) + >>> num2char(_na.arange(5), "%02d") + CharArray(['00', '01', '02', '03', '04']) + + Limitations: + + 1. When formatted values are too large to fit into strings of + length itemsize, the values are truncated, possibly losing + significant information. + + 2. Complex numbers are not supported. + + """ + n = _na.asarray(n) + + if isinstance(n.type(), _nt.ComplexType): + raise NotImplementedError("num2char doesn't support complex types yet.") + if n.type() == _na.Float64: + wnum = n + else: + wnum = n.astype(_na.Float64) + char = CharArray(shape=n.getshape(), itemsize=itemsize) + _chararray.Format(format, wnum, char) + return char + +def _nothing(*args): return 0 # Test everything... nothing private. + +def test(): + if sys.version_info < (2,4): + import doctest, strings + return doctest.testmod(strings, isprivate=_nothing) + else: + import numarray.numtest as nt, strings + t = nt.Tester(globs=globals()) + t.rundoc(strings) + return t.summarize() + +if __name__ == "__main__": + test() |