core

* use self.__name__ for private method instances extras * fixed corrcoef * introduced diagflat
author: pierregm <pierregm@localhost> 2008-08-07 02:25:19 +0000
committer: pierregm <pierregm@localhost> 2008-08-07 02:25:19 +0000
commit: 469c4d3d5417f5693ad8e326d7069cdec7c92663 (patch)
tree: 8d0c4a709fc6a0bf696040bfb9d1147d9fd3d759
parent: ded1bf725d022e9e2ac5df3d6785a14e702f6fec (diff)
download: numpy-469c4d3d5417f5693ad8e326d7069cdec7c92663.tar.gz
3 files changed, 151 insertions, 61 deletions
diff --git a/numpy/ma/core.py b/numpy/ma/core.py
index bb1e192c6..1811788b9 100644
--- a/numpy/ma/core.py
+++ b/numpy/ma/core.py
@@ -63,7 +63,7 @@ import cPickle
 import operator
 
 import numpy as np
-from numpy import ndarray, dtype, typecodes, amax, amin, iscomplexobj,\
+from numpy import ndarray, typecodes, amax, amin, iscomplexobj,\
     bool_, complex_, float_, int_, object_, str_
 from numpy import array as narray
 
@@ -540,7 +540,7 @@ class _MaskedBinaryOperation:
             return masked
         return result
     #
-    def reduce (self, target, axis=0, dtype=None):
+    def reduce(self, target, axis=0, dtype=None):
         """Reduce `target` along the given `axis`."""
         if isinstance(target, MaskedArray):
             tclass = type(target)
@@ -1104,15 +1104,15 @@ class _arraymethod(object):
 
     """
     def __init__(self, funcname, onmask=True):
-        self._name = self.__name__ = funcname
+        self.__name__ = funcname
         self._onmask = onmask
         self.obj = None
         self.__doc__ = self.getdoc()
     #
     def getdoc(self):
         "Return the doc of the function (from the doc of the method)."
-        methdoc = getattr(ndarray, self._name, None)
-        methdoc = getattr(np, self._name, methdoc)
+        methdoc = getattr(ndarray, self.__name__, None)
+        methdoc = getattr(np, self.__name__, methdoc)
         if methdoc is not None:
             return methdoc.__doc__
     #
@@ -1121,7 +1121,7 @@ class _arraymethod(object):
         return self
     #
     def __call__(self, *args, **params):
-        methodname = self._name
+        methodname = self.__name__
         data = self.obj._data
         mask = self.obj._mask
         cls = type(self.obj)
@@ -3180,7 +3180,7 @@ masked_%(name)s(data = %(data)s,
         """
         (ver, shp, typ, isf, raw, msk, flv) = state
         ndarray.__setstate__(self, (shp, typ, isf, raw))
-        self._mask.__setstate__((shp, dtype(bool), isf, msk))
+        self._mask.__setstate__((shp, np.dtype(bool), isf, msk))
         self.fill_value = flv
     #
     def __reduce__(self):
@@ -3360,28 +3360,28 @@ class _frommethod:
 
     """
     def __init__(self, methodname):
-        self._methodname = methodname
+        self.__name__ = methodname
         self.__doc__ = self.getdoc()
     def getdoc(self):
         "Return the doc of the function (from the doc of the method)."
         try:
-            return getattr(MaskedArray, self._methodname).__doc__
+            return getattr(MaskedArray, self.__name__).__doc__
         except:
-            return getattr(np, self._methodname).__doc__
+            return getattr(np, self.__name__).__doc__
     def __call__(self, a, *args, **params):
         if isinstance(a, MaskedArray):
-            return getattr(a, self._methodname).__call__(*args, **params)
+            return getattr(a, self.__name__).__call__(*args, **params)
         #FIXME ----
         #As x is not a MaskedArray, we transform it to a ndarray with asarray
         #... and call the corresponding method.
         #Except that sometimes it doesn't work (try reshape([1,2,3,4],(2,2)))
         #we end up with a "SystemError: NULL result without error in PyObject_Call"
         #A dirty trick is then to call the initial numpy function...
-        method = getattr(narray(a, copy=False), self._methodname)
+        method = getattr(narray(a, copy=False), self.__name__)
         try:
             return method(*args, **params)
         except SystemError:
-            return getattr(np,self._methodname).__call__(a, *args, **params)
+            return getattr(np,self.__name__).__call__(a, *args, **params)
 
 all = _frommethod('all')
 anomalies = anom = _frommethod('anom')
diff --git a/numpy/ma/extras.py b/numpy/ma/extras.py
index fef4743fd..b84edf595 100644
--- a/numpy/ma/extras.py
+++ b/numpy/ma/extras.py
@@ -15,7 +15,7 @@ __all__ = ['apply_along_axis', 'atleast_1d', 'atleast_2d', 'atleast_3d',
            'average',
            'column_stack','compress_cols','compress_rowcols', 'compress_rows',
            'count_masked', 'corrcoef', 'cov',
-           'dot','dstack',
+           'diagflat', 'dot','dstack',
            'ediff1d','expand_dims',
            'flatnotmasked_contiguous','flatnotmasked_edges',
            'hsplit','hstack',
@@ -96,14 +96,14 @@ def masked_all_like(arr):
 class _fromnxfunction:
     """Defines a wrapper to adapt numpy functions to masked arrays."""
     def __init__(self, funcname):
-        self._function = funcname
+        self.__name__ = funcname
         self.__doc__ = self.getdoc()
     def getdoc(self):
         "Retrieves the __doc__ string from the function."
-        return getattr(np, self._function).__doc__ +\
+        return getattr(np, self.__name__).__doc__ +\
             "*Notes*:\n    (The function is applied to both the _data and the _mask, if any.)"
     def __call__(self, *args, **params):
-        func = getattr(np, self._function)
+        func = getattr(np, self.__name__)
         if len(args)==1:
             x = args[0]
             if isinstance(x, ndarray):
@@ -137,6 +137,8 @@ dstack = _fromnxfunction('dstack')
 
 hsplit = _fromnxfunction('hsplit')
 
+diagflat = _fromnxfunction('diagflat')
+
 def expand_dims(a, axis):
     """Expands the shape of a by including newaxis before axis.
     """
@@ -643,6 +645,48 @@ def ediff1d(array, to_end=None, to_begin=None):
     return masked_array(r_data, mask=r_mask)
 
 
+def _covhelper(x, y=None, rowvar=True, allow_masked=True):
+    """
+    Private function for the computation of covariance and correlation
+    coefficients.
+    
+    """
+    x = ma.array(x, ndmin=2, copy=True, dtype=float)
+    xmask = ma.getmaskarray(x)
+    # Quick exit if we can't process masked data
+    if not allow_masked and xmask.any():
+        raise ValueError("Cannot process masked data...")
+    #
+    if x.shape[0] == 1:
+        rowvar = True
+    # Make sure that rowvar is either 0 or 1
+    rowvar = int(bool(rowvar))
+    axis = 1-rowvar
+    if rowvar:
+        tup = (slice(None), None)
+    else:
+        tup = (None, slice(None))
+    #
+    if y is None:
+        xnotmask = np.logical_not(xmask).astype(int)
+    else:
+        y = array(y, copy=False, ndmin=2, dtype=float)
+        ymask = ma.getmaskarray(y)
+        if not allow_masked and ymask.any():
+            raise ValueError("Cannot process masked data...")
+        if xmask.any() or ymask.any():
+            if y.shape == x.shape:
+                # Define some common mask
+                common_mask = np.logical_or(xmask, ymask)
+                if common_mask is not nomask:
+                    x.unshare_mask()
+                    y.unshare_mask()
+                    xmask = x._mask = y._mask = ymask = common_mask
+        x = ma.concatenate((x,y),axis)
+        xnotmask = np.logical_not(np.concatenate((xmask, ymask), axis)).astype(int)
+    x -= x.mean(axis=rowvar)[tup]
+    return (x, xnotmask, rowvar)
+
 
 def cov(x, y=None, rowvar=True, bias=False, allow_masked=True):
     """Estimates the covariance matrix.
@@ -683,47 +727,12 @@ def cov(x, y=None, rowvar=True, bias=False, allow_masked=True):
         Raised if some values are missing and allow_masked is False.
 
     """
-    x = array(x, ndmin=2, copy=True, dtype=float)
-    xmask = getmaskarray(x)
-    # Quick exit if we can't process masked data
-    if not allow_masked and xmask.any():
-        raise ValueError("Cannot process masked data...")
-    #
-    if x.shape[0] == 1:
-        rowvar = True
-    # Make sure that rowvar is either 0 or 1
-    rowvar = int(bool(rowvar))
-    axis = 1-rowvar
-    if rowvar:
-        tup = (slice(None), None)
-    else:
-        tup = (None, slice(None))
-    #
-    if y is None:
-        xnotmask = np.logical_not(xmask).astype(int)
-    else:
-        y = array(y, copy=False, ndmin=2, dtype=float)
-        ymask = getmaskarray(y)
-        if not allow_masked and ymask.any():
-            raise ValueError("Cannot process masked data...")
-        if xmask.any() or ymask.any():
-            if y.shape == x.shape:
-                # Define some common mask
-                common_mask = np.logical_or(xmask, ymask)
-                if common_mask is not nomask:
-                    x.unshare_mask()
-                    y.unshare_mask()
-                    xmask = x._mask = y._mask = ymask = common_mask
-        x = concatenate((x,y),axis)
-        xnotmask = np.logical_not(np.concatenate((xmask, ymask), axis)).astype(int)
-
-    x -= x.mean(axis=rowvar)[tup]
-
+    (x, xnotmask, rowvar) = _covhelper(x, y, rowvar, allow_masked)
     if not rowvar:
-        fact = dot(xnotmask.T, xnotmask)*1. - (1 - bool(bias))
+        fact = np.dot(xnotmask.T, xnotmask)*1. - (1 - bool(bias))
         result = (dot(x.T, x.conj(), strict=False) / fact).squeeze()
     else:
-        fact = dot(xnotmask, xnotmask.T)*1. - (1 - bool(bias))
+        fact = np.dot(xnotmask, xnotmask.T)*1. - (1 - bool(bias))
         result = (dot(x, x.T.conj(), strict=False) / fact).squeeze()
     return result
 
@@ -761,14 +770,39 @@ def corrcoef(x, y=None, rowvar=True, bias=False, allow_masked=True):
     cov
 
     """
-
-    c = cov(x, y, rowvar, bias, allow_masked)
+    # Get the data
+    (x, xnotmask, rowvar) = _covhelper(x, y, rowvar, allow_masked)
+    # Compute the covariance matrix
+    if not rowvar:
+        fact = np.dot(xnotmask.T, xnotmask)*1. - (1 - bool(bias))
+        c = (dot(x.T, x.conj(), strict=False) / fact).squeeze()
+    else:
+        fact = np.dot(xnotmask, xnotmask.T)*1. - (1 - bool(bias))
+        c = (dot(x, x.T.conj(), strict=False) / fact).squeeze()
+    # Check whether we have a scalar
     try:
-        d = ma.diag(c)
-    except ValueError: # scalar covariance
+        diag = ma.diagonal(c)
+    except ValueError:
         return 1
-    return c/ma.sqrt(ma.multiply.outer(d,d))
-
+    #
+    if xnotmask.all():
+        _denom = ma.sqrt(ma.multiply.outer(diag, diag))
+    else:
+        _denom = diagflat(diag)
+        n = x.shape[1-rowvar]
+        if rowvar:
+            for i in range(n-1):
+                for j in range(i+1,n):
+                    _x = mask_cols(vstack((x[i], x[j]))).var(axis=1,
+                                                             ddof=1-bias)
+                    _denom[i,j] = _denom[j,i] = ma.sqrt(ma.multiply.reduce(_x))
+        else:
+            for i in range(n-1):
+                for j in range(i+1,n):
+                    _x = mask_cols(vstack((x[:,i], x[:,j]))).var(axis=1,
+                                                                 ddof=1-bias)
+                    _denom[i,j] = _denom[j,i] = ma.sqrt(ma.multiply.reduce(_x))
+    return c/_denom
 
 #####--------------------------------------------------------------------------
 #---- --- Concatenation helpers ---
diff --git a/numpy/ma/tests/test_extras.py b/numpy/ma/tests/test_extras.py
index 5ca1a81ac..ff061d8dd 100644
--- a/numpy/ma/tests/test_extras.py
+++ b/numpy/ma/tests/test_extras.py
@@ -419,6 +419,62 @@ class TestCov(TestCase):
                             np.cov(xf, rowvar=False, bias=True) * x.shape[0]/frac)
 
 
+class TestCorrcoef(TestCase):
+    #
+    def setUp(self):
+        self.data = array(np.random.rand(12))
+    #
+    def test_1d_wo_missing(self):
+        "Test cov on 1D variable w/o missing values"
+        x = self.data
+        assert_equal(np.corrcoef(x), corrcoef(x))
+        assert_equal(np.corrcoef(x, rowvar=False), corrcoef(x, rowvar=False))
+        assert_equal(np.corrcoef(x, rowvar=False, bias=True),
+                     corrcoef(x, rowvar=False, bias=True))
+    #
+    def test_2d_wo_missing(self):
+        "Test corrcoef on 1 2D variable w/o missing values"
+        x = self.data.reshape(3,4)
+        assert_equal(np.corrcoef(x), corrcoef(x))
+        assert_equal(np.corrcoef(x, rowvar=False), corrcoef(x, rowvar=False))
+        assert_equal(np.corrcoef(x, rowvar=False, bias=True),
+                     corrcoef(x, rowvar=False, bias=True))
+    #
+    def test_1d_w_missing(self):
+        "Test corrcoef 1 1D variable w/missing values"
+        x = self.data
+        x[-1] = masked
+        x -= x.mean()
+        nx = x.compressed()
+        assert_almost_equal(np.corrcoef(nx), corrcoef(x))
+        assert_almost_equal(np.corrcoef(nx, rowvar=False), corrcoef(x, rowvar=False))
+        assert_almost_equal(np.corrcoef(nx, rowvar=False, bias=True),
+                            corrcoef(x, rowvar=False, bias=True))
+        #
+        try:
+            corrcoef(x, allow_masked=False)
+        except ValueError:
+            pass
+        #
+        # 2 1D variables w/ missing values
+        nx = x[1:-1]
+        assert_almost_equal(np.corrcoef(nx, nx[::-1]), corrcoef(x, x[::-1]))
+        assert_equal(np.corrcoef(nx, nx[::-1], rowvar=False), 
+                     corrcoef(x, x[::-1], rowvar=False))
+        assert_equal(np.corrcoef(nx, nx[::-1], rowvar=False, bias=True),
+                     corrcoef(x, x[::-1], rowvar=False, bias=True))
+    #
+    def test_2d_w_missing(self):
+        "Test corrcoef on 2D variable w/ missing value"
+        x = self.data
+        x[-1] = masked
+        x = x.reshape(3,4)
+        
+        test = corrcoef(x)
+        control = np.corrcoef(x)
+        assert_almost_equal(test[:-1,:-1], control[:-1,:-1])
+
+
 
 class TestPolynomial(TestCase):
     #
author	pierregm <pierregm@localhost>	2008-08-07 02:25:19 +0000
committer	pierregm <pierregm@localhost>	2008-08-07 02:25:19 +0000
commit	469c4d3d5417f5693ad8e326d7069cdec7c92663 (patch)
tree	8d0c4a709fc6a0bf696040bfb9d1147d9fd3d759
parent	ded1bf725d022e9e2ac5df3d6785a14e702f6fec (diff)
download	numpy-469c4d3d5417f5693ad8e326d7069cdec7c92663.tar.gz