"""Masked arrays add-ons.

A collection of utilities for maskedarray

:author: Pierre Gerard-Marchant
:contact: pierregm_at_uga_dot_edu
:version: $Id: extras.py 3473 2007-10-29 15:18:13Z jarrod.millman $
"""
__author__ = "Pierre GF Gerard-Marchant ($Author: jarrod.millman $)"
__version__ = '1.0'
__revision__ = "$Revision: 3473 $"
__date__     = '$Date: 2007-10-29 17:18:13 +0200 (Mon, 29 Oct 2007) $'

__all__ = [
'apply_along_axis', 'atleast_1d', 'atleast_2d', 'atleast_3d', 'average',
'vstack', 'hstack', 'dstack', 'row_stack', 'column_stack',
'compress_rowcols', 'compress_rows', 'compress_cols', 'count_masked',
'dot', 'hsplit',
'mask_rowcols','mask_rows','mask_cols','masked_all','masked_all_like',
'mediff1d', 'mr_',
'notmasked_edges','notmasked_contiguous',
'stdu', 'varu',
           ]

from itertools import groupby

import core
from core import *

import numpy
from numpy import float_
import numpy.core.umath as umath
import numpy.core.numeric as numeric
from numpy.core.numeric import ndarray
from numpy.core.numeric import array as nxarray
from numpy.core.fromnumeric import asarray as nxasarray

from numpy.lib.index_tricks import AxisConcatenator
import numpy.lib.function_base as function_base

#...............................................................................
def issequence(seq):
    """Is seq a sequence (ndarray, list or tuple)?"""
    if isinstance(seq, ndarray):
        return True
    elif isinstance(seq, tuple):
        return True
    elif isinstance(seq, list):
        return True
    return False

def count_masked(arr, axis=None):
    """Count the number of masked elements along the given axis.

    Parameters
    ----------
        axis : int, optional
            Axis along which to count.
            If None (default), a flattened version of the array is used.

    """
    m = getmaskarray(arr)
    return m.sum(axis)

def masked_all(shape, dtype=float_):
    """Return an empty masked array of the given shape and dtype,
    where all the data are masked.

    Parameters
    ----------
        dtype : dtype, optional
            Data type of the output.

    """
    a = masked_array(numeric.empty(shape, dtype),
                     mask=numeric.ones(shape, bool_))
    return a

def masked_all_like(arr):
    """Return an empty masked array of the same shape and dtype as
    the array `a`, where all the data are masked.

    """
    a = masked_array(numeric.empty_like(arr),
                     mask=numeric.ones(arr.shape, bool_))
    return a

#####--------------------------------------------------------------------------
#---- --- New methods ---
#####--------------------------------------------------------------------------
def varu(a, axis=None, dtype=None):
    """Return an unbiased estimate of the variance.
    i.e. var = sum((x - x.mean())**2)/(size(x,axis)-1)

    Parameters
    ----------
        axis : int, optional
            Axis along which to perform the operation.
            If None, applies to a flattened version of the array.
        dtype : {dtype}, optional
            Datatype for the intermediary computation. If not given,
            the current dtype is used instead.

    Notes
    -----
        The value returned is an unbiased estimate of the true variance.
        For the (less standard) biased estimate, use var.

    """
    a = asarray(a)
    cnt = a.count(axis=axis)
    anom = a.anom(axis=axis, dtype=dtype)
    anom *= anom
    dvar = anom.sum(axis) / (cnt-1)
    if axis is None:
        return dvar
    dvar.__setmask__(mask_or(a._mask.all(axis), (cnt==1)))
    return dvar
#    return a.__class__(dvar,
#                          mask=mask_or(a._mask.all(axis), (cnt==1)),
#                          fill_value=a._fill_value)

def stdu(a, axis=None, dtype=None):
    """Return an unbiased estimate of the standard deviation.  The
    standard deviation is the square root of the average of the
    squared deviations from the mean, i.e. stdu = sqrt(varu(x)).

    Parameters
    ----------
        axis : int, optional
            Axis along which to perform the operation.
            If None, applies to a flattened version of the array.
        dtype : dtype, optional
            Datatype for the intermediary computation.
            If not given, the current dtype is used instead.

    Notes
    -----
        The value returned is an unbiased estimate of the true
        standard deviation.  For the biased estimate,
        use std.

    """
    a = asarray(a)
    dvar = a.varu(axis,dtype)
    if axis is None:
        if dvar is masked:
            return masked
        else:
            # Should we use umath.sqrt instead ?
            return sqrt(dvar)
    return sqrt(dvar)


MaskedArray.stdu = stdu
MaskedArray.varu = varu

#####--------------------------------------------------------------------------
#---- --- Standard functions ---
#####--------------------------------------------------------------------------
class _fromnxfunction:
    """Defines a wrapper to adapt numpy functions to masked arrays."""
    def __init__(self, funcname):
        self._function = funcname
        self.__doc__ = self.getdoc()
    def getdoc(self):
        "Retrieves the __doc__ string from the function."
        return getattr(numpy, self._function).__doc__ +\
            "*Notes*:\n    (The function is applied to both the _data and the _mask, if any.)"
    def __call__(self, *args, **params):
        func = getattr(numpy, self._function)
        if len(args)==1:
            x = args[0]
            if isinstance(x,ndarray):
                _d = func(nxasarray(x), **params)
                _m = func(getmaskarray(x), **params)
                return masked_array(_d, mask=_m)
            elif isinstance(x, tuple) or isinstance(x, list):
                _d = func(tuple([nxasarray(a) for a in x]), **params)
                _m = func(tuple([getmaskarray(a) for a in x]), **params)
                return masked_array(_d, mask=_m)
        else:
            arrays = []
            args = list(args)
            while len(args)>0 and issequence(args[0]):
                arrays.append(args.pop(0))
            res = []
            for x in arrays:
                _d = func(nxasarray(x), *args, **params)
                _m = func(getmaskarray(x), *args, **params)
                res.append(masked_array(_d, mask=_m))
            return res

atleast_1d = _fromnxfunction('atleast_1d')
atleast_2d = _fromnxfunction('atleast_2d')
atleast_3d = _fromnxfunction('atleast_3d')

vstack = row_stack = _fromnxfunction('vstack')
hstack = _fromnxfunction('hstack')
column_stack = _fromnxfunction('column_stack')
dstack = _fromnxfunction('dstack')

hsplit = _fromnxfunction('hsplit')

#####--------------------------------------------------------------------------
#----
#####--------------------------------------------------------------------------
def flatten_inplace(seq):
    """Flatten a sequence in place."""
    k = 0
    while (k != len(seq)):
        while hasattr(seq[k],'__iter__'):
            seq[k:(k+1)] = seq[k]
        k += 1
    return seq


def apply_along_axis(func1d,axis,arr,*args,**kwargs):
    """Execute func1d(arr[i],*args) where func1d takes 1-D arrays and
    arr is an N-d array.  i varies so as to apply the function along
    the given axis for each 1-d subarray in arr.
    """
    arr = core.array(arr, copy=False, subok=True)
    nd = arr.ndim
    if axis < 0:
        axis += nd
    if (axis >= nd):
        raise ValueError("axis must be less than arr.ndim; axis=%d, rank=%d."
            % (axis,nd))
    ind = [0]*(nd-1)
    i = numeric.zeros(nd,'O')
    indlist = range(nd)
    indlist.remove(axis)
    i[axis] = slice(None,None)
    outshape = numeric.asarray(arr.shape).take(indlist)
    i.put(indlist, ind)
    j = i.copy()
    res = func1d(arr[tuple(i.tolist())],*args,**kwargs)
    #  if res is a number, then we have a smaller output array
    asscalar = numeric.isscalar(res)
    if not asscalar:
        try:
            len(res)
        except TypeError:
            asscalar = True
    # Note: we shouldn't set the dtype of the output from the first result...
    #...so we force the type to object, and build a list of dtypes
    #...we'll just take the largest, to avoid some downcasting
    dtypes = []
    if asscalar:
        dtypes.append(numeric.asarray(res).dtype)
        outarr = zeros(outshape, object_)
        outarr[tuple(ind)] = res
        Ntot = numeric.product(outshape)
        k = 1
        while k < Ntot:
            # increment the index
            ind[-1] += 1
            n = -1
            while (ind[n] >= outshape[n]) and (n > (1-nd)):
                ind[n-1] += 1
                ind[n] = 0
                n -= 1
            i.put(indlist,ind)
            res = func1d(arr[tuple(i.tolist())],*args,**kwargs)
            outarr[tuple(ind)] = res
            dtypes.append(asarray(res).dtype)
            k += 1
    else:
        res = core.array(res, copy=False, subok=True)
        j = i.copy()
        j[axis] = ([slice(None,None)] * res.ndim)
        j.put(indlist, ind)
        Ntot = numeric.product(outshape)
        holdshape = outshape
        outshape = list(arr.shape)
        outshape[axis] = res.shape
        dtypes.append(asarray(res).dtype)
        outshape = flatten_inplace(outshape)
        outarr = zeros(outshape, object_)
        outarr[tuple(flatten_inplace(j.tolist()))] = res
        k = 1
        while k < Ntot:
            # increment the index
            ind[-1] += 1
            n = -1
            while (ind[n] >= holdshape[n]) and (n > (1-nd)):
                ind[n-1] += 1
                ind[n] = 0
                n -= 1
            i.put(indlist, ind)
            j.put(indlist, ind)
            res = func1d(arr[tuple(i.tolist())],*args,**kwargs)
            outarr[tuple(flatten_inplace(j.tolist()))] = res
            dtypes.append(asarray(res).dtype)
            k += 1
    max_dtypes = numeric.dtype(numeric.asarray(dtypes).max())
    if not hasattr(arr, '_mask'):
        result = numeric.asarray(outarr, dtype=max_dtypes)
    else:
        result = core.asarray(outarr, dtype=max_dtypes)
        result.fill_value = core.default_fill_value(result)
    return result

def average(a, axis=None, weights=None, returned=False):
    """Average the array over the given axis.

    Parameters
    ----------
        axis : int, optional
            Axis along which to perform the operation.
            If None, applies to a flattened version of the array.
        weights : sequence, optional
            Sequence of weights.
            The weights must have the shape of a, or be 1D with length
            the size of a along the given axis.
            If no weights are given, weights are assumed to be 1.
        returned : bool
            Flag indicating whether a tuple (result, sum of weights/counts) 
            should be returned as output (True), or just the result (False).

    """
    a = asarray(a)
    mask = a.mask
    ash = a.shape
    if ash == ():
        ash = (1,)
    if axis is None:
        if mask is nomask:
            if weights is None:
                n = a.sum(axis=None)
                d = float(a.size)
            else:
                w = filled(weights, 0.0).ravel()
                n = umath.add.reduce(a._data.ravel() * w)
                d = umath.add.reduce(w)
                del w
        else:
            if weights is None:
                n = a.filled(0).sum(axis=None)
                d = umath.add.reduce((-mask).ravel().astype(int_))
            else:
                w = array(filled(weights, 0.0), float, mask=mask).ravel()
                n = add.reduce(a.ravel() * w)
                d = add.reduce(w)
                del w
    else:
        if mask is nomask:
            if weights is None:
                d = ash[axis] * 1.0
                n = add.reduce(a._data, axis, dtype=float_)
            else:
                w = filled(weights, 0.0)
                wsh = w.shape
                if wsh == ():
                    wsh = (1,)
                if wsh == ash:
                    w = numeric.array(w, float_, copy=0)
                    n = add.reduce(a*w, axis)
                    d = add.reduce(w, axis)
                    del w
                elif wsh == (ash[axis],):
                    ni = ash[axis]
                    r = [None]*len(ash)
                    r[axis] = slice(None, None, 1)
                    w = eval ("w["+ repr(tuple(r)) + "] * ones(ash, float)")
                    n = add.reduce(a*w, axis, dtype=float_)
                    d = add.reduce(w, axis, dtype=float_)
                    del w, r
                else:
                    raise ValueError, 'average: weights wrong shape.'
        else:
            if weights is None:
                n = add.reduce(a, axis, dtype=float_)
                d = umath.add.reduce((-mask), axis=axis, dtype=float_)
            else:
                w = filled(weights, 0.0)
                wsh = w.shape
                if wsh == ():
                    wsh = (1,)
                if wsh == ash:
                    w = array(w, dtype=float_, mask=mask, copy=0)
                    n = add.reduce(a*w, axis, dtype=float_)
                    d = add.reduce(w, axis, dtype=float_)
                elif wsh == (ash[axis],):
                    ni = ash[axis]
                    r = [None]*len(ash)
                    r[axis] = slice(None, None, 1)
                    w = eval ("w["+ repr(tuple(r)) + \
                              "] * masked_array(ones(ash, float), mask)")
                    n = add.reduce(a*w, axis, dtype=float_)
                    d = add.reduce(w, axis, dtype=float_)
                else:
                    raise ValueError, 'average: weights wrong shape.'
                del w
    if n is masked or d is masked:
        return masked
    result = n/d
    del n

    if isMaskedArray(result):
        if ((axis is None) or (axis==0 and a.ndim == 1)) and \
           (result.mask is nomask):
            result = result._data
        if returned:
            if not isMaskedArray(d):
                d = masked_array(d)
            if isinstance(d, ndarray) and (not d.shape == result.shape):
                d = ones(result.shape, dtype=float_) * d
    if returned:
        return result, d
    else:
        return result

#..............................................................................
def compress_rowcols(x, axis=None):
    """Suppress the rows and/or columns of a 2D array that contains
    masked values.

    The suppression behavior is selected with the `axis`parameter.
        - If axis is None, rows and columns are suppressed.
        - If axis is 0, only rows are suppressed.
        - If axis is 1 or -1, only columns are suppressed.
        
    Parameters
    ----------
        axis : int, optional
            Axis along which to perform the operation.
            If None, applies to a flattened version of the array.

    Returns
    -------
        compressed_array : an ndarray.

    """
    x = asarray(x)
    if x.ndim != 2:
        raise NotImplementedError, "compress2d works for 2D arrays only."
    m = getmask(x)
    # Nothing is masked: return x
    if m is nomask or not m.any():
        return x._data
    # All is masked: return empty
    if m.all():
        return nxarray([])
    # Builds a list of rows/columns indices
    (idxr, idxc) = (range(len(x)), range(x.shape[1]))
    masked = m.nonzero()
    if not axis:
        for i in function_base.unique(masked[0]):
            idxr.remove(i)
    if axis in [None, 1, -1]:
        for j in function_base.unique(masked[1]):
            idxc.remove(j)
    return x._data[idxr][:,idxc]

def compress_rows(a):
    """Suppress whole rows of a 2D array that contain masked values.

    """
    return compress_rowcols(a,0)

def compress_cols(a):
    """Suppress whole columnss of a 2D array that contain masked values.

    """
    return compress_rowcols(a,1)

def mask_rowcols(a, axis=None):
    """Mask whole rows and/or columns of a 2D array that contain
    masked values.  The masking behavior is selected with the
    `axis`parameter.

        - If axis is None, rows and columns are masked.
        - If axis is 0, only rows are masked.
        - If axis is 1 or -1, only columns are masked.

    Parameters
    ----------
        axis : int, optional
            Axis along which to perform the operation.
            If None, applies to a flattened version of the array.
            
    Returns
    -------
         a *pure* ndarray.

    """
    a = asarray(a)
    if a.ndim != 2:
        raise NotImplementedError, "compress2d works for 2D arrays only."
    m = getmask(a)
    # Nothing is masked: return a
    if m is nomask or not m.any():
        return a
    maskedval = m.nonzero()
    a._mask = a._mask.copy()
    if not axis:
        a[function_base.unique(maskedval[0])] = masked
    if axis in [None, 1, -1]:
        a[:,function_base.unique(maskedval[1])] = masked
    return a

def mask_rows(a, axis=None):
    """Mask whole rows of a 2D array that contain masked values.

    Parameters
    ----------
        axis : int, optional
            Axis along which to perform the operation.
            If None, applies to a flattened version of the array.
    """
    return mask_rowcols(a, 0)

def mask_cols(a, axis=None):
    """Mask whole columns of a 2D array that contain masked values.

    Parameters
    ----------
        axis : int, optional
            Axis along which to perform the operation.
            If None, applies to a flattened version of the array.
    """
    return mask_rowcols(a, 1)


def dot(a,b, strict=False):
    """Return the dot product of two 2D masked arrays a and b.

    Like the generic numpy equivalent, the product sum is over the
    last dimension of a and the second-to-last dimension of b.  If
    strict is True, masked values are propagated: if a masked value
    appears in a row or column, the whole row or column is considered
    masked.

    Parameters
    ----------
        strict : {boolean}
            Whether masked data are propagated (True) or set to 0 for
            the computation.

    Notes
    -----
        The first argument is not conjugated.

    """
    #TODO: Works only with 2D arrays. There should be a way to get it to run with higher dimension
    if strict and (a.ndim == 2) and (b.ndim == 2):
        a = mask_rows(a)
        b = mask_cols(b)
    #
    d = numpy.dot(filled(a, 0), filled(b, 0))
    #
    am = (~getmaskarray(a))
    bm = (~getmaskarray(b))
    m = ~numpy.dot(am,bm)
    return masked_array(d, mask=m)

#...............................................................................
def mediff1d(array, to_end=None, to_begin=None):
    """Return the differences between consecutive elements of an
    array, possibly with prefixed and/or appended values.

    Parameters
    ----------
        array : {array}
            Input array,  will be flattened before the difference is taken.
        to_end : {number}, optional
            If provided, this number will be tacked onto the end of the returned
            differences.
        to_begin : {number}, optional
            If provided, this number will be taked onto the beginning of the
            returned differences.

    Returns
    -------
          ed : {array}
            The differences. Loosely, this will be (ary[1:] - ary[:-1]).

    """
    a = masked_array(array, copy=True)
    if a.ndim > 1:
        a.reshape((a.size,))
    (d, m, n) = (a._data, a._mask, a.size-1)
    dd = d[1:]-d[:-1]
    if m is nomask:
        dm = nomask
    else:
        dm = m[1:]-m[:-1]
    #
    if to_end is not None:
        to_end = asarray(to_end)
        nend = to_end.size
        if to_begin is not None:
            to_begin = asarray(to_begin)
            nbegin = to_begin.size
            r_data = numeric.empty((n+nend+nbegin,), dtype=a.dtype)
            r_mask = numeric.zeros((n+nend+nbegin,), dtype=bool_)
            r_data[:nbegin] = to_begin._data
            r_mask[:nbegin] = to_begin._mask
            r_data[nbegin:-nend] = dd
            r_mask[nbegin:-nend] = dm
        else:
            r_data = numeric.empty((n+nend,), dtype=a.dtype)
            r_mask = numeric.zeros((n+nend,), dtype=bool_)
            r_data[:-nend] = dd
            r_mask[:-nend] = dm
        r_data[-nend:] = to_end._data
        r_mask[-nend:] = to_end._mask
    #
    elif to_begin is not None:
        to_begin = asarray(to_begin)
        nbegin = to_begin.size
        r_data = numeric.empty((n+nbegin,), dtype=a.dtype)
        r_mask = numeric.zeros((n+nbegin,), dtype=bool_)
        r_data[:nbegin] = to_begin._data
        r_mask[:nbegin] = to_begin._mask
        r_data[nbegin:] = dd
        r_mask[nbegin:] = dm
    #
    else:
        r_data = dd
        r_mask = dm
    return masked_array(r_data, mask=r_mask)


#####--------------------------------------------------------------------------
#---- --- Concatenation helpers ---
#####--------------------------------------------------------------------------

class MAxisConcatenator(AxisConcatenator):
    """Translate slice objects to concatenation along an axis.

    """

    def __init__(self, axis=0):
        AxisConcatenator.__init__(self, axis, matrix=False)

    def __getitem__(self,key):
        if isinstance(key, str):
            raise MAError, "Unavailable for masked array."
        if type(key) is not tuple:
            key = (key,)
        objs = []
        scalars = []
        final_dtypedescr = None
        for k in range(len(key)):
            scalar = False
            if type(key[k]) is slice:
                step = key[k].step
                start = key[k].start
                stop = key[k].stop
                if start is None:
                    start = 0
                if step is None:
                    step = 1
                if type(step) is type(1j):
                    size = int(abs(step))
                    newobj = function_base.linspace(start, stop, num=size)
                else:
                    newobj = numeric.arange(start, stop, step)
            elif type(key[k]) is str:
                if (key[k] in 'rc'):
                    self.matrix = True
                    self.col = (key[k] == 'c')
                    continue
                try:
                    self.axis = int(key[k])
                    continue
                except (ValueError, TypeError):
                    raise ValueError, "Unknown special directive"
            elif type(key[k]) in numeric.ScalarType:
                newobj = asarray([key[k]])
                scalars.append(k)
                scalar = True
            else:
                newobj = key[k]
            objs.append(newobj)
            if isinstance(newobj, numeric.ndarray) and not scalar:
                if final_dtypedescr is None:
                    final_dtypedescr = newobj.dtype
                elif newobj.dtype > final_dtypedescr:
                    final_dtypedescr = newobj.dtype
        if final_dtypedescr is not None:
            for k in scalars:
                objs[k] = objs[k].astype(final_dtypedescr)
        res = concatenate(tuple(objs),axis=self.axis)
        return self._retval(res)

class mr_class(MAxisConcatenator):
    """Translate slice objects to concatenation along the first axis.

    For example:
        >>> mr_[array([1,2,3]), 0, 0, array([4,5,6])]
        array([1, 2, 3, 0, 0, 4, 5, 6])

    """
    def __init__(self):
        MAxisConcatenator.__init__(self, 0)

mr_ = mr_class()

#####--------------------------------------------------------------------------
#---- ---
#####--------------------------------------------------------------------------

def flatnotmasked_edges(a):
    """Find the indices of the first and last not masked values in a
    1D masked array.  If all values are masked, returns None.

    """
    m = getmask(a)
    if m is nomask or not numpy.any(m):
        return [0,-1]
    unmasked = numeric.flatnonzero(~m)
    if len(unmasked) > 0:
        return unmasked[[0,-1]]
    else:
        return None

def notmasked_edges(a, axis=None):
    """Find the indices of the first and last not masked values along
    the given axis in a masked array.

    If all values are masked, return None.  Otherwise, return a list
    of 2 tuples, corresponding to the indices of the first and last
    unmasked values respectively.

    Parameters
    ----------
        axis : int, optional
            Axis along which to perform the operation.
            If None, applies to a flattened version of the array.
    """
    a = asarray(a)
    if axis is None or a.ndim == 1:
        return flatnotmasked_edges(a)
    m = getmask(a)
    idx = array(numpy.indices(a.shape), mask=nxasarray([m]*a.ndim))
    return [tuple([idx[i].min(axis).compressed() for i in range(a.ndim)]),
            tuple([idx[i].max(axis).compressed() for i in range(a.ndim)]),]

def flatnotmasked_contiguous(a):
    """Find contiguous unmasked data in a flattened masked array.

    Return a sorted sequence of slices (start index, end index).

    """
    m = getmask(a)
    if m is nomask:
        return (a.size, [0,-1])
    unmasked = numeric.flatnonzero(~m)
    if len(unmasked) == 0:
        return None
    result = []
    for k, group in groupby(enumerate(unmasked), lambda (i,x):i-x):
        tmp = numpy.fromiter((g[1] for g in group), int_)
#        result.append((tmp.size, tuple(tmp[[0,-1]])))
        result.append( slice(tmp[0],tmp[-1]) )
    result.sort()
    return result

def notmasked_contiguous(a, axis=None):
    """Find contiguous unmasked data in a masked array along the given
    axis.

    Parameters
    ----------
        axis : int, optional
            Axis along which to perform the operation.
            If None, applies to a flattened version of the array.
            
    Returns
    -------
        a sorted sequence of slices (start index, end index).

    Notes
    -----
        Only accepts 2D arrays at most.

    """
    a = asarray(a)
    nd = a.ndim
    if nd > 2:
        raise NotImplementedError,"Currently limited to atmost 2D array."
    if axis is None or nd == 1:
        return flatnotmasked_contiguous(a)
    #
    result = []
    #
    other = (axis+1)%2
    idx = [0,0]
    idx[axis] = slice(None,None)
    #
    for i in range(a.shape[other]):
        idx[other] = i
        result.append( flatnotmasked_contiguous(a[idx]) )
    return result

################################################################################