diff options
author | Stefan van der Walt <stefan@sun.ac.za> | 2007-12-15 01:15:26 +0000 |
---|---|---|
committer | Stefan van der Walt <stefan@sun.ac.za> | 2007-12-15 01:15:26 +0000 |
commit | 703e8d6323b19cbfeb96772c1e35f1cd68629336 (patch) | |
tree | 34bd23200d97ff43369d7d23d37c9c08c3d3a3b4 /numpy/ma/extras.py | |
parent | 61f9f6d0fb169cadefe35ea2bdd783848aa771f5 (diff) | |
download | numpy-703e8d6323b19cbfeb96772c1e35f1cd68629336.tar.gz |
Move ma to numpy root. Fix unit tests. Remove references to numpy.core.ma.
Diffstat (limited to 'numpy/ma/extras.py')
-rw-r--r-- | numpy/ma/extras.py | 722 |
1 files changed, 722 insertions, 0 deletions
diff --git a/numpy/ma/extras.py b/numpy/ma/extras.py new file mode 100644 index 000000000..f122d6d47 --- /dev/null +++ b/numpy/ma/extras.py @@ -0,0 +1,722 @@ +"""Masked arrays add-ons. + +A collection of utilities for maskedarray + +:author: Pierre Gerard-Marchant +:contact: pierregm_at_uga_dot_edu +:version: $Id: extras.py 3473 2007-10-29 15:18:13Z jarrod.millman $ +""" +__author__ = "Pierre GF Gerard-Marchant ($Author: jarrod.millman $)" +__version__ = '1.0' +__revision__ = "$Revision: 3473 $" +__date__ = '$Date: 2007-10-29 17:18:13 +0200 (Mon, 29 Oct 2007) $' + +__all__ = [ +'apply_along_axis', 'atleast_1d', 'atleast_2d', 'atleast_3d', 'average', +'vstack', 'hstack', 'dstack', 'row_stack', 'column_stack', +'compress_rowcols', 'compress_rows', 'compress_cols', 'count_masked', +'dot', 'hsplit', +'mask_rowcols','mask_rows','mask_cols','masked_all','masked_all_like', +'mediff1d', 'mr_', +'notmasked_edges','notmasked_contiguous', +'stdu', 'varu', + ] + +from itertools import groupby + +import core +from core import * + +import numpy +from numpy import float_ +import numpy.core.umath as umath +import numpy.core.numeric as numeric +from numpy.core.numeric import ndarray +from numpy.core.numeric import array as nxarray +from numpy.core.fromnumeric import asarray as nxasarray + +from numpy.lib.index_tricks import concatenator +import numpy.lib.function_base as function_base + +#............................................................................... +def issequence(seq): + """Returns True if the argument is a sequence (ndarray, list or tuple).""" + if isinstance(seq, ndarray): + return True + elif isinstance(seq, tuple): + return True + elif isinstance(seq, list): + return True + return False + +def count_masked(arr, axis=None): + """Counts the number of masked elements along the given axis. + +*Parameters*: + axis : {integer}, optional + Axis along which to count. + If None (default), a flattened version of the array is used. + """ + m = getmaskarray(arr) + return m.sum(axis) + +def masked_all(shape, dtype=float_): + """Returns an empty masked array of the given shape and dtype, + where all the data are masked. + +*Parameters*: + dtype : {dtype}, optional + Data type of the output. + """ + a = masked_array(numeric.empty(shape, dtype), + mask=numeric.ones(shape, bool_)) + return a + +def masked_all_like(arr): + """Returns an empty masked array of the same shape and dtype as the array `a`, + where all the data are masked.""" + a = masked_array(numeric.empty_like(arr), + mask=numeric.ones(arr.shape, bool_)) + return a + +#####-------------------------------------------------------------------------- +#---- --- New methods --- +#####-------------------------------------------------------------------------- +def varu(a, axis=None, dtype=None): + """Returns an unbiased estimate of the variance. + i.e. var = sum((x - x.mean())**2)/(size(x,axis)-1) + +*Parameters*: + axis : {integer}, optional + Axis along which to perform the operation. + If None, applies to a flattened version of the array. + dtype : {dtype}, optional + Datatype for the intermediary computation. If not given, the current dtype + is used instead. + +*Notes*: + The value returned is an unbiased estimate of the true variance. + For the (less standard) biased estimate, use var. + """ + a = asarray(a) + cnt = a.count(axis=axis) + anom = a.anom(axis=axis, dtype=dtype) + anom *= anom + dvar = anom.sum(axis) / (cnt-1) + if axis is None: + return dvar + dvar.__setmask__(mask_or(a._mask.all(axis), (cnt==1))) + return dvar +# return a.__class__(dvar, +# mask=mask_or(a._mask.all(axis), (cnt==1)), +# fill_value=a._fill_value) + +def stdu(a, axis=None, dtype=None): + """Returns an unbiased estimate of the standard deviation. + The standard deviation is the square root of the average of the squared + deviations from the mean, i.e. stdu = sqrt(varu(x)). + +*Parameters*: + axis : {integer}, optional + Axis along which to perform the operation. + If None, applies to a flattened version of the array. + dtype : {dtype}, optional + Datatype for the intermediary computation. + If not given, the current dtype is used instead. + +*Notes*: + The value returned is an unbiased estimate of the true standard deviation. + For the (less standard) biased estimate, use std. + """ + a = asarray(a) + dvar = a.varu(axis,dtype) + if axis is None: + if dvar is masked: + return masked + else: + # Should we use umath.sqrt instead ? + return sqrt(dvar) + return sqrt(dvar) +# return a.__class__(sqrt(dvar._data), mask=dvar._mask, +# fill_value=a._fill_value) + +MaskedArray.stdu = stdu +MaskedArray.varu = varu + +#####-------------------------------------------------------------------------- +#---- --- Standard functions --- +#####-------------------------------------------------------------------------- +class _fromnxfunction: + """Defines a wrapper to adapt numpy functions to masked arrays.""" + def __init__(self, funcname): + self._function = funcname + self.__doc__ = self.getdoc() + def getdoc(self): + "Retrieves the __doc__ string from the function." + return getattr(numpy, self._function).__doc__ +\ + "*Notes*:\n (The function is applied to both the _data and the _mask, if any.)" + def __call__(self, *args, **params): + func = getattr(numpy, self._function) + if len(args)==1: + x = args[0] + if isinstance(x,ndarray): + _d = func(nxasarray(x), **params) + _m = func(getmaskarray(x), **params) + return masked_array(_d, mask=_m) + elif isinstance(x, tuple) or isinstance(x, list): + _d = func(tuple([nxasarray(a) for a in x]), **params) + _m = func(tuple([getmaskarray(a) for a in x]), **params) + return masked_array(_d, mask=_m) + else: + arrays = [] + args = list(args) + while len(args)>0 and issequence(args[0]): + arrays.append(args.pop(0)) + res = [] + for x in arrays: + _d = func(nxasarray(x), *args, **params) + _m = func(getmaskarray(x), *args, **params) + res.append(masked_array(_d, mask=_m)) + return res + +atleast_1d = _fromnxfunction('atleast_1d') +atleast_2d = _fromnxfunction('atleast_2d') +atleast_3d = _fromnxfunction('atleast_3d') + +vstack = row_stack = _fromnxfunction('vstack') +hstack = _fromnxfunction('hstack') +column_stack = _fromnxfunction('column_stack') +dstack = _fromnxfunction('dstack') + +hsplit = _fromnxfunction('hsplit') + +#####-------------------------------------------------------------------------- +#---- +#####-------------------------------------------------------------------------- +def flatten_inplace(seq): + """Flattens a sequence in place.""" + k = 0 + while (k != len(seq)): + while hasattr(seq[k],'__iter__'): + seq[k:(k+1)] = seq[k] + k += 1 + return seq + + +def apply_along_axis(func1d,axis,arr,*args,**kwargs): + """ Execute func1d(arr[i],*args) where func1d takes 1-D arrays + and arr is an N-d array. i varies so as to apply the function + along the given axis for each 1-d subarray in arr. + """ + arr = core.array(arr, copy=False, subok=True) + nd = arr.ndim + if axis < 0: + axis += nd + if (axis >= nd): + raise ValueError("axis must be less than arr.ndim; axis=%d, rank=%d." + % (axis,nd)) + ind = [0]*(nd-1) + i = numeric.zeros(nd,'O') + indlist = range(nd) + indlist.remove(axis) + i[axis] = slice(None,None) + outshape = numeric.asarray(arr.shape).take(indlist) + i.put(indlist, ind) + j = i.copy() + res = func1d(arr[tuple(i.tolist())],*args,**kwargs) + # if res is a number, then we have a smaller output array + asscalar = numeric.isscalar(res) + if not asscalar: + try: + len(res) + except TypeError: + asscalar = True + # Note: we shouldn't set the dtype of the output from the first result... + #...so we force the type to object, and build a list of dtypes + #...we'll just take the largest, to avoid some downcasting + dtypes = [] + if asscalar: + dtypes.append(numeric.asarray(res).dtype) + outarr = zeros(outshape, object_) + outarr[tuple(ind)] = res + Ntot = numeric.product(outshape) + k = 1 + while k < Ntot: + # increment the index + ind[-1] += 1 + n = -1 + while (ind[n] >= outshape[n]) and (n > (1-nd)): + ind[n-1] += 1 + ind[n] = 0 + n -= 1 + i.put(indlist,ind) + res = func1d(arr[tuple(i.tolist())],*args,**kwargs) + outarr[tuple(ind)] = res + dtypes.append(asarray(res).dtype) + k += 1 + else: + res = core.array(res, copy=False, subok=True) + j = i.copy() + j[axis] = ([slice(None,None)] * res.ndim) + j.put(indlist, ind) + Ntot = numeric.product(outshape) + holdshape = outshape + outshape = list(arr.shape) + outshape[axis] = res.shape + dtypes.append(asarray(res).dtype) + outshape = flatten_inplace(outshape) + outarr = zeros(outshape, object_) + outarr[tuple(flatten_inplace(j.tolist()))] = res + k = 1 + while k < Ntot: + # increment the index + ind[-1] += 1 + n = -1 + while (ind[n] >= holdshape[n]) and (n > (1-nd)): + ind[n-1] += 1 + ind[n] = 0 + n -= 1 + i.put(indlist, ind) + j.put(indlist, ind) + res = func1d(arr[tuple(i.tolist())],*args,**kwargs) + outarr[tuple(flatten_inplace(j.tolist()))] = res + dtypes.append(asarray(res).dtype) + k += 1 + max_dtypes = numeric.dtype(numeric.asarray(dtypes).max()) + if not hasattr(arr, '_mask'): + result = numeric.asarray(outarr, dtype=max_dtypes) + else: + result = core.asarray(outarr, dtype=max_dtypes) + result.fill_value = core.default_fill_value(result) + return result + +def average (a, axis=None, weights=None, returned=False): + """Averages the array over the given axis. + +*Parameters*: + axis : {integer}, optional + Axis along which to perform the operation. + If None, applies to a flattened version of the array. + weights : {sequence}, optional + Sequence of weights. + The weights must have the shape of a, or be 1D with length the size of a + along the given axis. + If no weights are given, weights are assumed to be 1. + returned : {boolean} + Flag indicating whether a tuple (result, sum of weights/counts) should be + returned as output (True), or just the result (False). + """ + a = asarray(a) + mask = a.mask + ash = a.shape + if ash == (): + ash = (1,) + if axis is None: + if mask is nomask: + if weights is None: + n = a.sum(axis=None) + d = float(a.size) + else: + w = filled(weights, 0.0).ravel() + n = umath.add.reduce(a._data.ravel() * w) + d = umath.add.reduce(w) + del w + else: + if weights is None: + n = a.filled(0).sum(axis=None) + d = umath.add.reduce((-mask).ravel().astype(int_)) + else: + w = array(filled(weights, 0.0), float, mask=mask).ravel() + n = add.reduce(a.ravel() * w) + d = add.reduce(w) + del w + else: + if mask is nomask: + if weights is None: + d = ash[axis] * 1.0 + n = add.reduce(a._data, axis, dtype=float_) + else: + w = filled(weights, 0.0) + wsh = w.shape + if wsh == (): + wsh = (1,) + if wsh == ash: + w = numeric.array(w, float_, copy=0) + n = add.reduce(a*w, axis) + d = add.reduce(w, axis) + del w + elif wsh == (ash[axis],): + ni = ash[axis] + r = [None]*len(ash) + r[axis] = slice(None, None, 1) + w = eval ("w["+ repr(tuple(r)) + "] * ones(ash, float)") + n = add.reduce(a*w, axis, dtype=float_) + d = add.reduce(w, axis, dtype=float_) + del w, r + else: + raise ValueError, 'average: weights wrong shape.' + else: + if weights is None: + n = add.reduce(a, axis, dtype=float_) + d = umath.add.reduce((-mask), axis=axis, dtype=float_) + else: + w = filled(weights, 0.0) + wsh = w.shape + if wsh == (): + wsh = (1,) + if wsh == ash: + w = array(w, dtype=float_, mask=mask, copy=0) + n = add.reduce(a*w, axis, dtype=float_) + d = add.reduce(w, axis, dtype=float_) + elif wsh == (ash[axis],): + ni = ash[axis] + r = [None]*len(ash) + r[axis] = slice(None, None, 1) + w = eval ("w["+ repr(tuple(r)) + "] * masked_array(ones(ash, float), mask)") + n = add.reduce(a*w, axis, dtype=float_) + d = add.reduce(w, axis, dtype=float_) + else: + raise ValueError, 'average: weights wrong shape.' + del w + if n is masked or d is masked: + return masked + result = n/d + del n + + if isMaskedArray(result): + if ((axis is None) or (axis==0 and a.ndim == 1)) and \ + (result.mask is nomask): + result = result._data + if returned: + if not isMaskedArray(d): + d = masked_array(d) + if isinstance(d, ndarray) and (not d.shape == result.shape): + d = ones(result.shape, dtype=float_) * d + if returned: + return result, d + else: + return result + +#.............................................................................. +def compress_rowcols(x, axis=None): + """Suppresses the rows and/or columns of a 2D array that contains masked values. + + The suppression behavior is selected with the `axis`parameter. + - If axis is None, rows and columns are suppressed. + - If axis is 0, only rows are suppressed. + - If axis is 1 or -1, only columns are suppressed. + +*Returns*: + compressed_array : a ndarray. + """ + x = asarray(x) + if x.ndim != 2: + raise NotImplementedError, "compress2d works for 2D arrays only." + m = getmask(x) + # Nothing is masked: return x + if m is nomask or not m.any(): + return x._data + # All is masked: return empty + if m.all(): + return nxarray([]) + # Builds a list of rows/columns indices + (idxr, idxc) = (range(len(x)), range(x.shape[1])) + masked = m.nonzero() + if not axis: + for i in function_base.unique(masked[0]): + idxr.remove(i) + if axis in [None, 1, -1]: + for j in function_base.unique(masked[1]): + idxc.remove(j) + return x._data[idxr][:,idxc] + +def compress_rows(a): + """Suppresses whole rows of a 2D array that contain masked values.""" + return compress_rowcols(a,0) + +def compress_cols(a): + """Suppresses whole columnss of a 2D array that contain masked values.""" + return compress_rowcols(a,1) + +def mask_rowcols(a, axis=None): + """Masks whole rows and/or columns of a 2D array that contain masked values. + The masking behavior is selected with the `axis`parameter. + - If axis is None, rows and columns are masked. + - If axis is 0, only rows are masked. + - If axis is 1 or -1, only columns are masked. + Returns a *pure* ndarray. + """ + a = asarray(a) + if a.ndim != 2: + raise NotImplementedError, "compress2d works for 2D arrays only." + m = getmask(a) + # Nothing is masked: return a + if m is nomask or not m.any(): + return a + maskedval = m.nonzero() + a._mask = a._mask.copy() + if not axis: + a[function_base.unique(maskedval[0])] = masked + if axis in [None, 1, -1]: + a[:,function_base.unique(maskedval[1])] = masked + return a + +def mask_rows(a, axis=None): + """Masks whole rows of a 2D array that contain masked values.""" + return mask_rowcols(a, 0) + +def mask_cols(a, axis=None): + """Masks whole columns of a 2D array that contain masked values.""" + return mask_rowcols(a, 1) + + +def dot(a,b, strict=False): + """Returns the dot product of two 2D masked arrays a and b. + + Like the generic numpy equivalent, the product sum is over the last dimension + of a and the second-to-last dimension of b. + If strict is True, masked values are propagated: if a masked value appears + in a row or column, the whole row or column is considered masked. + +*Parameters*: + strict : {boolean} + Whether masked data are propagated (True) or set to 0 for the computation. + +*Note*: + The first argument is not conjugated. + """ + #TODO: Works only with 2D arrays. There should be a way to get it to run with higher dimension + if strict and (a.ndim == 2) and (b.ndim == 2): + a = mask_rows(a) + b = mask_cols(b) + # + d = numpy.dot(filled(a, 0), filled(b, 0)) + # + am = (~getmaskarray(a)) + bm = (~getmaskarray(b)) + m = ~numpy.dot(am,bm) + return masked_array(d, mask=m) + +#............................................................................... +def mediff1d(array, to_end=None, to_begin=None): + """Returns the differences between consecutive elements of an array, possibly with + prefixed and/or appended values. + +*Parameters*: + array : {array} + Input array, will be flattened before the difference is taken. + to_end : {number}, optional + If provided, this number will be tacked onto the end of the returned + differences. + to_begin : {number}, optional + If provided, this number will be taked onto the beginning of the + returned differences. + +*Returns*: + ed : {array} + The differences. Loosely, this will be (ary[1:] - ary[:-1]). + """ + a = masked_array(array, copy=True) + if a.ndim > 1: + a.reshape((a.size,)) + (d, m, n) = (a._data, a._mask, a.size-1) + dd = d[1:]-d[:-1] + if m is nomask: + dm = nomask + else: + dm = m[1:]-m[:-1] + # + if to_end is not None: + to_end = asarray(to_end) + nend = to_end.size + if to_begin is not None: + to_begin = asarray(to_begin) + nbegin = to_begin.size + r_data = numeric.empty((n+nend+nbegin,), dtype=a.dtype) + r_mask = numeric.zeros((n+nend+nbegin,), dtype=bool_) + r_data[:nbegin] = to_begin._data + r_mask[:nbegin] = to_begin._mask + r_data[nbegin:-nend] = dd + r_mask[nbegin:-nend] = dm + else: + r_data = numeric.empty((n+nend,), dtype=a.dtype) + r_mask = numeric.zeros((n+nend,), dtype=bool_) + r_data[:-nend] = dd + r_mask[:-nend] = dm + r_data[-nend:] = to_end._data + r_mask[-nend:] = to_end._mask + # + elif to_begin is not None: + to_begin = asarray(to_begin) + nbegin = to_begin.size + r_data = numeric.empty((n+nbegin,), dtype=a.dtype) + r_mask = numeric.zeros((n+nbegin,), dtype=bool_) + r_data[:nbegin] = to_begin._data + r_mask[:nbegin] = to_begin._mask + r_data[nbegin:] = dd + r_mask[nbegin:] = dm + # + else: + r_data = dd + r_mask = dm + return masked_array(r_data, mask=r_mask) + + + + +#####-------------------------------------------------------------------------- +#---- --- Concatenation helpers --- +#####-------------------------------------------------------------------------- + +class mconcatenator(concatenator): + """Translates slice objects to concatenation along an axis.""" + + def __init__(self, axis=0): + concatenator.__init__(self, axis, matrix=False) + + def __getitem__(self,key): + if isinstance(key, str): + raise MAError, "Unavailable for masked array." + if type(key) is not tuple: + key = (key,) + objs = [] + scalars = [] + final_dtypedescr = None + for k in range(len(key)): + scalar = False + if type(key[k]) is slice: + step = key[k].step + start = key[k].start + stop = key[k].stop + if start is None: + start = 0 + if step is None: + step = 1 + if type(step) is type(1j): + size = int(abs(step)) + newobj = function_base.linspace(start, stop, num=size) + else: + newobj = numeric.arange(start, stop, step) + elif type(key[k]) is str: + if (key[k] in 'rc'): + self.matrix = True + self.col = (key[k] == 'c') + continue + try: + self.axis = int(key[k]) + continue + except (ValueError, TypeError): + raise ValueError, "Unknown special directive" + elif type(key[k]) in numeric.ScalarType: + newobj = asarray([key[k]]) + scalars.append(k) + scalar = True + else: + newobj = key[k] + objs.append(newobj) + if isinstance(newobj, numeric.ndarray) and not scalar: + if final_dtypedescr is None: + final_dtypedescr = newobj.dtype + elif newobj.dtype > final_dtypedescr: + final_dtypedescr = newobj.dtype + if final_dtypedescr is not None: + for k in scalars: + objs[k] = objs[k].astype(final_dtypedescr) + res = concatenate(tuple(objs),axis=self.axis) + return self._retval(res) + +class mr_class(mconcatenator): + """Translates slice objects to concatenation along the first axis. + + For example: + >>> mr_[array([1,2,3]), 0, 0, array([4,5,6])] + array([1, 2, 3, 0, 0, 4, 5, 6]) + """ + def __init__(self): + mconcatenator.__init__(self, 0) + +mr_ = mr_class() + +#####-------------------------------------------------------------------------- +#---- --- +#####-------------------------------------------------------------------------- + +def flatnotmasked_edges(a): + """Finds the indices of the first and last not masked values in a 1D masked array. + If all values are masked, returns None. + """ + m = getmask(a) + if m is nomask or not numpy.any(m): + return [0,-1] + unmasked = numeric.flatnonzero(~m) + if len(unmasked) > 0: + return unmasked[[0,-1]] + else: + return None + +def notmasked_edges(a, axis=None): + """Finds the indices of the first and last not masked values along the given + axis in a masked array. + If all values are masked, returns None. + Otherwise, returns a list of 2 tuples, corresponding to the indices of the + first and last unmasked values respectively. + """ + a = asarray(a) + if axis is None or a.ndim == 1: + return flatnotmasked_edges(a) + m = getmask(a) + idx = array(numpy.indices(a.shape), mask=nxasarray([m]*a.ndim)) + return [tuple([idx[i].min(axis).compressed() for i in range(a.ndim)]), + tuple([idx[i].max(axis).compressed() for i in range(a.ndim)]),] + +def flatnotmasked_contiguous(a): + """Finds contiguous unmasked data in a flattened masked array. + Returns a sorted sequence of slices (start index, end index). + """ + m = getmask(a) + if m is nomask: + return (a.size, [0,-1]) + unmasked = numeric.flatnonzero(~m) + if len(unmasked) == 0: + return None + result = [] + for k, group in groupby(enumerate(unmasked), lambda (i,x):i-x): + tmp = numpy.fromiter((g[1] for g in group), int_) +# result.append((tmp.size, tuple(tmp[[0,-1]]))) + result.append( slice(tmp[0],tmp[-1]) ) + result.sort() + return result + +def notmasked_contiguous(a, axis=None): + """Finds contiguous unmasked data in a masked array along the given axis. + Returns a sorted sequence of slices (start index, end index). + Note: Only accepts 2D arrays at most. + """ + a = asarray(a) + nd = a.ndim + if nd > 2: + raise NotImplementedError,"Currently limited to atmost 2D array." + if axis is None or nd == 1: + return flatnotmasked_contiguous(a) + # + result = [] + # + other = (axis+1)%2 + idx = [0,0] + idx[axis] = slice(None,None) + # + for i in range(a.shape[other]): + idx[other] = i + result.append( flatnotmasked_contiguous(a[idx]) ) + return result + +################################################################################ +if __name__ == '__main__': + # + import numpy as N + from maskedarray.testutils import assert_equal + if 1: + b = ones(5) + m = [1,0,0,0,0] + d = masked_array(b,mask=m) + c = mr_[d,0,0,d] |