summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorpierregm <pierregm@localhost>2008-02-15 19:39:10 +0000
committerpierregm <pierregm@localhost>2008-02-15 19:39:10 +0000
commite6f97dc2f943d4196716376e5d73de7ebb4c9b21 (patch)
treeda2be035d2ad68625f8505b9278bedcde6f0d528
parent5cc1b4eda40d6310096757518f9abd6ca298d1cd (diff)
parent4e1fbfdcbfa5add735451ef36d62a7cec2850d45 (diff)
downloadnumpy-e6f97dc2f943d4196716376e5d73de7ebb4c9b21.tar.gz
updated API_CHANGES.txt
mstats : updated docstrings morestats : updated docstrings core : fixed __setitem__ for records
-rw-r--r--numpy/ma/API_CHANGES.txt70
-rw-r--r--numpy/ma/core.py14
-rw-r--r--numpy/ma/morestats.py122
-rw-r--r--numpy/ma/mstats.py209
-rw-r--r--numpy/ma/tests/test_core.py11
5 files changed, 292 insertions, 134 deletions
diff --git a/numpy/ma/API_CHANGES.txt b/numpy/ma/API_CHANGES.txt
index 551aba33c..8f1766fcb 100644
--- a/numpy/ma/API_CHANGES.txt
+++ b/numpy/ma/API_CHANGES.txt
@@ -4,6 +4,52 @@
API changes in the new masked array implementation
==================================================
+Masked arrays are subclasses of ndarray
+---------------------------------------
+
+Contrary to the original implementation, masked arrays are now regular ndarrays::
+
+ >>> x = masked_array([1,2,3],mask=[0,0,1])
+ >>> print isinstance(x, numpy.ndarray)
+ True
+
+
+``_data`` returns a view of the masked array
+--------------------------------------------
+
+Masked arrays are composed of a ``_data`` part and a ``_mask``. Accessing the
+``_data`` part will return a regular ndarray or any of its subclass, depending
+on the initial data::
+
+ >>> x = masked_array(numpy.matrix([[1,2],[3,4]]),mask=[[0,0],[0,1]])
+ >>> print x._data
+ [[1 2]
+ [3 4]]
+ >>> print type(x._data)
+ <class 'numpy.core.defmatrix.matrix'>
+
+
+In practice, ``_data`` is implemented as a property, not as an attribute.
+Therefore, you cannot access it directly, and some simple tests such as the
+following one will fail::
+
+ >>>x._data is x._data
+ False
+
+
+``filled(x)`` can return a subclass of ndarray
+-------------
+The function ``filled(a)`` returns an array of the same type as ``a._data``::
+
+ >>> x = masked_array(numpy.matrix([[1,2],[3,4]]),mask=[[0,0],[0,1]])
+ >>> y = filled(x)
+ >>> print type(y)
+ <class 'numpy.core.defmatrix.matrix'>
+ >>> print y
+ matrix([[ 1, 2],
+ [ 3, 999999]])
+
+
``put``, ``putmask`` behave like their ndarray counterparts
-----------------------------------------------------------
@@ -66,3 +112,27 @@ converted to booleans:
File "<stdin>", line 1, in <module>
ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all()
+
+==================================
+New features (non exhaustive list)
+==================================
+
+``mr_``
+-------
+
+``mr_`` mimics the behavior of ``r_`` for masked arrays::
+
+``anom``
+--------
+
+The ``anom`` method returns the deviations from the average (anomalies).
+
+``varu`` and ``stdu``
+---------------------
+
+These methods return unbiased estimates of the variance and standard deviation
+respectively. An unbiased estimate is obtained by dividing the sum of the
+squared anomalies by ``n-1`` instead of ``n`` for the biased estimates, where
+``n`` is the number of unmasked elements along the given axis.
+
+
diff --git a/numpy/ma/core.py b/numpy/ma/core.py
index ada1a554a..21d6b312f 100644
--- a/numpy/ma/core.py
+++ b/numpy/ma/core.py
@@ -1358,7 +1358,7 @@ class MaskedArray(numeric.ndarray):
self._sharedmask = False
return
#....
- dval = getdata(value).astype(self.dtype)
+ dval = narray(value, copy=False, dtype=self.dtype)
valmask = getmask(value)
if self._mask is nomask:
if valmask is not nomask:
@@ -3305,3 +3305,15 @@ def loads(strg):
return cPickle.loads(strg)
################################################################################
+
+if 1:
+ from testutils import assert_equal
+ if 1:
+ mtype = [('f',float_),('s','|S3')]
+ x = array([(1,'a'),(2,'b'),(numpy.pi,'pi')], dtype=mtype)
+ x[0] = (10,'A')
+ (xf, xs) = (x['f'], x['s'])
+ assert_equal(xf.data, [10,2,numpy.pi])
+ assert_equal(xf.dtype, float_)
+ assert_equal(xs.data, ['A', 'b', 'pi'])
+ assert_equal(xs.dtype, '|S3')
diff --git a/numpy/ma/morestats.py b/numpy/ma/morestats.py
index e7085c240..b9e77a3c9 100644
--- a/numpy/ma/morestats.py
+++ b/numpy/ma/morestats.py
@@ -40,22 +40,26 @@ def hdquantiles(data, prob=list([.25,.5,.75]), axis=None, var=False,):
"""Computes quantile estimates with the Harrell-Davis method, where the estimates
are calculated as a weighted linear combination of order statistics.
-*Parameters* :
- data: {ndarray}
+Parameters
+----------
+ data: ndarray
Data array.
- prob: {sequence}
+ prob: sequence
Sequence of quantiles to compute.
- axis : {integer}
+ axis : int
Axis along which to compute the quantiles. If None, use a flattened array.
- var : {boolean}
+ var : boolean
Whether to return the variance of the estimate.
-*Returns*
+Returns
+-------
A (p,) array of quantiles (if ``var`` is False), or a (2,p) array of quantiles
and variances (if ``var`` is True), where ``p`` is the number of quantiles.
-:Note:
+Notes
+-----
The function is restricted to 2D arrays.
+
"""
def _hd_1D(data,prob,var):
"Computes the HD quantiles for a 1D array. Returns nan for invalid data."
@@ -102,13 +106,15 @@ are calculated as a weighted linear combination of order statistics.
def hdmedian(data, axis=-1, var=False):
"""Returns the Harrell-Davis estimate of the median along the given axis.
-*Parameters* :
- data: {ndarray}
+Parameters
+----------
+ data: ndarray
Data array.
- axis : {integer}
+ axis : int
Axis along which to compute the quantiles. If None, use a flattened array.
- var : {boolean}
+ var : boolean
Whether to return the variance of the estimate.
+
"""
result = hdquantiles(data,[0.5], axis=axis, var=var)
return result.squeeze()
@@ -119,16 +125,19 @@ def hdquantiles_sd(data, prob=list([.25,.5,.75]), axis=None):
"""Computes the standard error of the Harrell-Davis quantile estimates by jackknife.
-*Parameters* :
- data: {ndarray}
+Parameters
+----------
+ data: ndarray
Data array.
- prob: {sequence}
+ prob: sequence
Sequence of quantiles to compute.
- axis : {integer}
+ axis : int
Axis along which to compute the quantiles. If None, use a flattened array.
-*Note*:
+Notes
+-----
The function is restricted to 2D arrays.
+
"""
def _hdsd_1D(data,prob):
"Computes the std error for 1D arrays."
@@ -172,16 +181,18 @@ def trimmed_mean_ci(data, proportiontocut=0.2, alpha=0.05, axis=None):
"""Returns the selected confidence interval of the trimmed mean along the
given axis.
-*Parameters* :
- data : {sequence}
+Parameters
+----------
+ data : sequence
Input data. The data is transformed to a masked array
- proportiontocut : {float}
+ proportiontocut : float
Proportion of the data to cut from each side of the data .
As a result, (2*proportiontocut*n) values are actually trimmed.
- alpha : {float}
+ alpha : float
Confidence level of the intervals.
- axis : {integer}
+ axis : int
Axis along which to cut. If None, uses a flattened version of the input.
+
"""
data = masked_array(data, copy=False)
trimmed = trim_both(data, proportiontocut=proportiontocut, axis=axis)
@@ -196,13 +207,15 @@ def mjci(data, prob=[0.25,0.5,0.75], axis=None):
"""Returns the Maritz-Jarrett estimators of the standard error of selected
experimental quantiles of the data.
-*Parameters* :
- data: {ndarray}
+Parameters
+-----------
+ data: ndarray
Data array.
- prob: {sequence}
+ prob: sequence
Sequence of quantiles to compute.
- axis : {integer}
+ axis : int
Axis along which to compute the quantiles. If None, use a flattened array.
+
"""
def _mjci_1D(data, p):
data = data.compressed()
@@ -236,14 +249,15 @@ def mquantiles_cimj(data, prob=[0.25,0.50,0.75], alpha=0.05, axis=None):
"""Computes the alpha confidence interval for the selected quantiles of the
data, with Maritz-Jarrett estimators.
-*Parameters* :
- data: {ndarray}
+Parameters
+----------
+ data: ndarray
Data array.
- prob: {sequence}
+ prob: sequence
Sequence of quantiles to compute.
- alpha : {float}
+ alpha : float
Confidence level of the intervals.
- axis : {integer}
+ axis : integer
Axis along which to compute the quantiles. If None, use a flattened array.
"""
alpha = min(alpha, 1-alpha)
@@ -258,13 +272,14 @@ def median_cihs(data, alpha=0.05, axis=None):
"""Computes the alpha-level confidence interval for the median of the data,
following the Hettmasperger-Sheather method.
-*Parameters* :
- data : {sequence}
+Parameters
+----------
+ data : sequence
Input data. Masked values are discarded. The input should be 1D only, or
axis should be set to None.
- alpha : {float}
+ alpha : float
Confidence level of the intervals.
- axis : {integer}
+ axis : integer
Axis along which to compute the quantiles. If None, use a flattened array.
"""
def _cihs_1D(data, alpha):
@@ -299,7 +314,8 @@ def compare_medians_ms(group_1, group_2, axis=None):
The comparison is performed using the McKean-Schrader estimate of the standard
error of the medians.
-*Parameters* :
+Parameters
+----------
group_1 : {sequence}
First dataset.
group_2 : {sequence}
@@ -307,7 +323,8 @@ error of the medians.
axis : {integer}
Axis along which the medians are estimated. If None, the arrays are flattened.
-*Returns* :
+Returns
+-------
A (p,) array of comparison values.
"""
@@ -325,22 +342,23 @@ error of the medians.
#..............................................................................
def rank_data(data, axis=None, use_missing=False):
"""Returns the rank (also known as order statistics) of each data point
-along the given axis.
-
-If some values are tied, their rank is averaged.
-If some values are masked, their rank is set to 0 if use_missing is False, or
-set to the average rank of the unmasked values if use_missing is True.
-
-*Parameters* :
- data : {sequence}
- Input data. The data is transformed to a masked array
- axis : {integer}
- Axis along which to perform the ranking. If None, the array is first
- flattened. An exception is raised if the axis is specified for arrays
- with a dimension larger than 2
- use_missing : {boolean}
- Whether the masked values have a rank of 0 (False) or equal to the
- average rank of the unmasked values (True).
+ along the given axis.
+
+ If some values are tied, their rank is averaged.
+ If some values are masked, their rank is set to 0 if use_missing is False,
+ or set to the average rank of the unmasked values if use_missing is True.
+
+ Parameters
+ ----------
+ data : sequence
+ Input data. The data is transformed to a masked array
+ axis : integer
+ Axis along which to perform the ranking.
+ If None, the array is first flattened. An exception is raised if
+ the axis is specified for arrays with a dimension larger than 2
+ use_missing : boolean
+ Whether the masked values have a rank of 0 (False) or equal to the
+ average rank of the unmasked values (True).
"""
#
def _rank1d(data, use_missing=False):
diff --git a/numpy/ma/mstats.py b/numpy/ma/mstats.py
index 8daa49c4b..cd2c93c78 100644
--- a/numpy/ma/mstats.py
+++ b/numpy/ma/mstats.py
@@ -33,16 +33,20 @@ __all__ = ['cov','meppf','plotting_positions','meppf','mmedian','mquantiles',
def winsorize(data, alpha=0.2):
"""Returns a Winsorized version of the input array.
+
+ The (alpha/2.) lowest values are set to the (alpha/2.)th percentile,
+ and the (alpha/2.) highest values are set to the (1-alpha/2.)th
+ percentile.
+ Masked values are skipped.
+
+ Parameters
+ ----------
+ data : ndarray
+ Input data to Winsorize. The data is first flattened.
+ alpha : float
+ Percentage of total Winsorization: alpha/2. on the left,
+ alpha/2. on the right
-The (alpha/2.) lowest values are set to the (alpha/2.)th percentile, and
-the (alpha/2.) highest values are set to the (1-alpha/2.)th percentile
-Masked values are skipped.
-
-*Parameters*:
- data : {ndarray}
- Input data to Winsorize. The data is first flattened.
- alpha : {float}, optional
- Percentage of total Winsorization : alpha/2. on the left, alpha/2. on the right
"""
data = masked_array(data, copy=False).ravel()
idxsort = data.argsort()
@@ -53,18 +57,26 @@ Masked values are skipped.
#..............................................................................
def trim_both(data, proportiontocut=0.2, axis=None):
- """Trims the data by masking the int(trim*n) smallest and int(trim*n) largest
-values of data along the given axis, where n is the number of unmasked values.
-
-*Parameters*:
- data : {ndarray}
- Data to trim.
- proportiontocut : {float}
- Percentage of trimming. If n is the number of unmasked values before trimming,
- the number of values after trimming is (1-2*trim)*n.
- axis : {integer}
- Axis along which to perform the trimming. If None, the input array is first
- flattened.
+ """Trims the data by masking the int(trim*n) smallest and int(trim*n)
+ largest values of data along the given axis, where n is the number
+ of unmasked values.
+
+ Parameters
+ ----------
+ data : ndarray
+ Data to trim.
+ proportiontocut : float
+ Percentage of trimming. If n is the number of unmasked values
+ before trimming, the number of values after trimming is:
+ (1-2*trim)*n.
+ axis : int
+ Axis along which to perform the trimming.
+ If None, the input array is first flattened.
+
+ Notes
+ -----
+ The function works only for arrays up to 2D.
+
"""
#...................
def _trim_1D(data, trim):
@@ -87,22 +99,30 @@ values of data along the given axis, where n is the number of unmasked values.
#..............................................................................
def trim_tail(data, proportiontocut=0.2, tail='left', axis=None):
- """Trims the data by masking int(trim*n) values from ONE tail of the data
-along the given axis, where n is the number of unmasked values.
-
-*Parameters*:
- data : {ndarray}
- Data to trim.
- proportiontocut : {float}
- Percentage of trimming. If n is the number of unmasked values before trimming,
- the number of values after trimming is (1-trim)*n.
- tail : {string}
- Trimming direction, in ('left', 'right'). If left, the proportiontocut
- lowest values are set to the corresponding percentile. If right, the
- proportiontocut highest values are used instead.
- axis : {integer}
- Axis along which to perform the trimming. If None, the input array is first
- flattened.
+ """Trims the data by masking int(trim*n) values from ONE tail of the
+ data along the given axis, where n is the number of unmasked values.
+
+ Parameters
+ ----------
+ data : ndarray
+ Data to trim.
+ proportiontocut : float
+ Percentage of trimming. If n is the number of unmasked values
+ before trimming, the number of values after trimming is
+ (1-trim)*n.
+ tail : string
+ Trimming direction, in ('left', 'right').
+ If left, the ``proportiontocut`` lowest values are set to the
+ corresponding percentile. If right, the ``proportiontocut``
+ highest values are used instead.
+ axis : int
+ Axis along which to perform the trimming.
+ If None, the input array is first flattened.
+
+ Notes
+ -----
+ The function works only for arrays up to 2D.
+
"""
#...................
def _trim_1D(data, trim, left):
@@ -138,35 +158,43 @@ along the given axis, where n is the number of unmasked values.
#..............................................................................
def trimmed_mean(data, proportiontocut=0.2, axis=None):
- """Returns the trimmed mean of the data along the given axis. Trimming is
-performed on both ends of the distribution.
-
-*Parameters*:
- data : {ndarray}
- Data to trim.
- proportiontocut : {float}
- Proportion of the data to cut from each side of the data .
- As a result, (2*proportiontocut*n) values are actually trimmed.
- axis : {integer}
- Axis along which to perform the trimming. If None, the input array is first
- flattened.
+ """Returns the trimmed mean of the data along the given axis.
+ Trimming is performed on both ends of the distribution.
+
+ Parameters
+ ----------
+ data : ndarray
+ Data to trim.
+ proportiontocut : float
+ Proportion of the data to cut from each side of the data .
+ As a result, (2*proportiontocut*n) values are actually trimmed.
+ axis : int
+ Axis along which to perform the trimming.
+ If None, the input array is first flattened.
+
"""
return trim_both(data, proportiontocut=proportiontocut, axis=axis).mean(axis=axis)
#..............................................................................
def trimmed_stde(data, proportiontocut=0.2, axis=None):
"""Returns the standard error of the trimmed mean for the input data,
-along the given axis. Trimming is performed on both ends of the distribution.
-
-*Parameters*:
- data : {ndarray}
- Data to trim.
- proportiontocut : {float}
- Proportion of the data to cut from each side of the data .
- As a result, (2*proportiontocut*n) values are actually trimmed.
- axis : {integer}
- Axis along which to perform the trimming. If None, the input array is first
- flattened.
+ along the given axis. Trimming is performed on both ends of the distribution.
+
+ Parameters
+ ----------
+ data : ndarray
+ Data to trim.
+ proportiontocut : float
+ Proportion of the data to cut from each side of the data .
+ As a result, (2*proportiontocut*n) values are actually trimmed.
+ axis : int
+ Axis along which to perform the trimming.
+ If None, the input array is first flattened.
+
+ Notes
+ -----
+ The function worrks with arrays up to 2D.
+
"""
#........................
def _trimmed_stde_1D(data, trim=0.2):
@@ -189,13 +217,14 @@ def stde_median(data, axis=None):
"""Returns the McKean-Schrader estimate of the standard error of the sample
median along the given axis.
+ Parameters
+ ----------
+ data : ndarray
+ Data to trim.
+ axis : int
+ Axis along which to perform the trimming.
+ If None, the input array is first flattened.
-*Parameters*:
- data : {ndarray}
- Data to trim.
- axis : {integer}
- Axis along which to perform the trimming. If None, the input array is first
- flattened.
"""
def _stdemed_1D(data):
sorted = numpy.sort(data.compressed())
@@ -240,16 +269,17 @@ Typical values of (alpha,beta) are:
- (.4,.4) : approximately quantile unbiased (Cunnane)
- (.35,.35): APL, used with PWM
-*Parameters*:
- x : {sequence}
+Parameters
+----------
+ x : sequence
Input data, as a sequence or array of dimension at most 2.
- prob : {sequence}
+ prob : sequence
List of quantiles to compute.
- alpha : {float}
+ alpha : float
Plotting positions parameter.
- beta : {float}
+ beta : float
Plotting positions parameter.
- axis : {integer}
+ axis : int
Axis along which to perform the trimming. If None, the input array is first
flattened.
"""
@@ -299,6 +329,18 @@ def plotting_positions(data, alpha=0.4, beta=0.4):
if x is normally distributed (R type 9)
- (.4,.4) : approximately quantile unbiased (Cunnane)
- (.35,.35): APL, used with PWM
+
+Parameters
+----------
+ x : sequence
+ Input data, as a sequence or array of dimension at most 2.
+ prob : sequence
+ List of quantiles to compute.
+ alpha : float
+ Plotting positions parameter.
+ beta : float
+ Plotting positions parameter.
+
"""
data = masked_array(data, copy=False).reshape(1,-1)
n = data.count()
@@ -311,7 +353,11 @@ meppf = plotting_positions
def mmedian(data, axis=None):
- """Returns the median of data along the given axis. Missing data are discarded."""
+ """Returns the median of data along the given axis.
+
+ Missing data are discarded.
+
+ """
def _median1D(data):
x = numpy.sort(data.compressed())
if x.size == 0:
@@ -331,17 +377,18 @@ def cov(x, y=None, rowvar=True, bias=False, strict=False):
Normalization is by (N-1) where N is the number of observations (unbiased
estimate). If bias is True then normalization is by N.
-*Parameters*:
- x : {ndarray}
+Parameters
+----------
+ x : ndarray
Input data. If x is a 1D array, returns the variance. If x is a 2D array,
returns the covariance matrix.
- y : {ndarray}, optional
+ y : ndarray
Optional set of variables.
- rowvar : {boolean}
+ rowvar : boolean
If rowvar is true, then each row is a variable with obersvations in columns.
If rowvar is False, each column is a variable and the observations are in
the rows.
- bias : {boolean}
+ bias : boolean
Whether to use a biased or unbiased estimate of the covariance.
If bias is True, then the normalization is by N, the number of observations.
Otherwise, the normalization is by (N-1)
@@ -400,10 +447,10 @@ def rsh(data, points=None):
"""Evalutates Rosenblatt's shifted histogram estimators for each point
on the dataset 'data'.
-*Parameters* :
- data : {sequence}
+Parameters
+ data : sequence
Input data. Masked values are ignored.
- points : {sequence}
+ points : sequence
Sequence of points where to evaluate Rosenblatt shifted histogram.
If None, use the data.
"""
diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py
index 347372b9b..1d30687c6 100644
--- a/numpy/ma/tests/test_core.py
+++ b/numpy/ma/tests/test_core.py
@@ -838,6 +838,17 @@ class TestMA(NumpyTestCase):
assert_equal(xs.mask, [0,1,0])
assert_equal(xs.dtype, '|S3')
#
+ def test_set_records(self):
+ "Check setting an element of a record)"
+ mtype = [('f',float_),('s','|S3')]
+ x = array([(1,'a'),(2,'b'),(numpy.pi,'pi')], dtype=mtype)
+ x[0] = (10,'A')
+ (xf, xs) = (x['f'], x['s'])
+ assert_equal(xf.data, [10,2,numpy.pi])
+ assert_equal(xf.dtype, float_)
+ assert_equal(xs.data, ['A', 'b', 'pi'])
+ assert_equal(xs.dtype, '|S3')
+
#...............................................................................