diff options
author | pierregm <pierregm@localhost> | 2008-02-15 19:39:10 +0000 |
---|---|---|
committer | pierregm <pierregm@localhost> | 2008-02-15 19:39:10 +0000 |
commit | e6f97dc2f943d4196716376e5d73de7ebb4c9b21 (patch) | |
tree | da2be035d2ad68625f8505b9278bedcde6f0d528 | |
parent | 5cc1b4eda40d6310096757518f9abd6ca298d1cd (diff) | |
parent | 4e1fbfdcbfa5add735451ef36d62a7cec2850d45 (diff) | |
download | numpy-e6f97dc2f943d4196716376e5d73de7ebb4c9b21.tar.gz |
updated API_CHANGES.txt
mstats : updated docstrings
morestats : updated docstrings
core : fixed __setitem__ for records
-rw-r--r-- | numpy/ma/API_CHANGES.txt | 70 | ||||
-rw-r--r-- | numpy/ma/core.py | 14 | ||||
-rw-r--r-- | numpy/ma/morestats.py | 122 | ||||
-rw-r--r-- | numpy/ma/mstats.py | 209 | ||||
-rw-r--r-- | numpy/ma/tests/test_core.py | 11 |
5 files changed, 292 insertions, 134 deletions
diff --git a/numpy/ma/API_CHANGES.txt b/numpy/ma/API_CHANGES.txt index 551aba33c..8f1766fcb 100644 --- a/numpy/ma/API_CHANGES.txt +++ b/numpy/ma/API_CHANGES.txt @@ -4,6 +4,52 @@ API changes in the new masked array implementation ================================================== +Masked arrays are subclasses of ndarray +--------------------------------------- + +Contrary to the original implementation, masked arrays are now regular ndarrays:: + + >>> x = masked_array([1,2,3],mask=[0,0,1]) + >>> print isinstance(x, numpy.ndarray) + True + + +``_data`` returns a view of the masked array +-------------------------------------------- + +Masked arrays are composed of a ``_data`` part and a ``_mask``. Accessing the +``_data`` part will return a regular ndarray or any of its subclass, depending +on the initial data:: + + >>> x = masked_array(numpy.matrix([[1,2],[3,4]]),mask=[[0,0],[0,1]]) + >>> print x._data + [[1 2] + [3 4]] + >>> print type(x._data) + <class 'numpy.core.defmatrix.matrix'> + + +In practice, ``_data`` is implemented as a property, not as an attribute. +Therefore, you cannot access it directly, and some simple tests such as the +following one will fail:: + + >>>x._data is x._data + False + + +``filled(x)`` can return a subclass of ndarray +------------- +The function ``filled(a)`` returns an array of the same type as ``a._data``:: + + >>> x = masked_array(numpy.matrix([[1,2],[3,4]]),mask=[[0,0],[0,1]]) + >>> y = filled(x) + >>> print type(y) + <class 'numpy.core.defmatrix.matrix'> + >>> print y + matrix([[ 1, 2], + [ 3, 999999]]) + + ``put``, ``putmask`` behave like their ndarray counterparts ----------------------------------------------------------- @@ -66,3 +112,27 @@ converted to booleans: File "<stdin>", line 1, in <module> ValueError: The truth value of an array with more than one element is ambiguous. Use a.any() or a.all() + +================================== +New features (non exhaustive list) +================================== + +``mr_`` +------- + +``mr_`` mimics the behavior of ``r_`` for masked arrays:: + +``anom`` +-------- + +The ``anom`` method returns the deviations from the average (anomalies). + +``varu`` and ``stdu`` +--------------------- + +These methods return unbiased estimates of the variance and standard deviation +respectively. An unbiased estimate is obtained by dividing the sum of the +squared anomalies by ``n-1`` instead of ``n`` for the biased estimates, where +``n`` is the number of unmasked elements along the given axis. + + diff --git a/numpy/ma/core.py b/numpy/ma/core.py index ada1a554a..21d6b312f 100644 --- a/numpy/ma/core.py +++ b/numpy/ma/core.py @@ -1358,7 +1358,7 @@ class MaskedArray(numeric.ndarray): self._sharedmask = False return #.... - dval = getdata(value).astype(self.dtype) + dval = narray(value, copy=False, dtype=self.dtype) valmask = getmask(value) if self._mask is nomask: if valmask is not nomask: @@ -3305,3 +3305,15 @@ def loads(strg): return cPickle.loads(strg) ################################################################################ + +if 1: + from testutils import assert_equal + if 1: + mtype = [('f',float_),('s','|S3')] + x = array([(1,'a'),(2,'b'),(numpy.pi,'pi')], dtype=mtype) + x[0] = (10,'A') + (xf, xs) = (x['f'], x['s']) + assert_equal(xf.data, [10,2,numpy.pi]) + assert_equal(xf.dtype, float_) + assert_equal(xs.data, ['A', 'b', 'pi']) + assert_equal(xs.dtype, '|S3') diff --git a/numpy/ma/morestats.py b/numpy/ma/morestats.py index e7085c240..b9e77a3c9 100644 --- a/numpy/ma/morestats.py +++ b/numpy/ma/morestats.py @@ -40,22 +40,26 @@ def hdquantiles(data, prob=list([.25,.5,.75]), axis=None, var=False,): """Computes quantile estimates with the Harrell-Davis method, where the estimates are calculated as a weighted linear combination of order statistics. -*Parameters* : - data: {ndarray} +Parameters +---------- + data: ndarray Data array. - prob: {sequence} + prob: sequence Sequence of quantiles to compute. - axis : {integer} + axis : int Axis along which to compute the quantiles. If None, use a flattened array. - var : {boolean} + var : boolean Whether to return the variance of the estimate. -*Returns* +Returns +------- A (p,) array of quantiles (if ``var`` is False), or a (2,p) array of quantiles and variances (if ``var`` is True), where ``p`` is the number of quantiles. -:Note: +Notes +----- The function is restricted to 2D arrays. + """ def _hd_1D(data,prob,var): "Computes the HD quantiles for a 1D array. Returns nan for invalid data." @@ -102,13 +106,15 @@ are calculated as a weighted linear combination of order statistics. def hdmedian(data, axis=-1, var=False): """Returns the Harrell-Davis estimate of the median along the given axis. -*Parameters* : - data: {ndarray} +Parameters +---------- + data: ndarray Data array. - axis : {integer} + axis : int Axis along which to compute the quantiles. If None, use a flattened array. - var : {boolean} + var : boolean Whether to return the variance of the estimate. + """ result = hdquantiles(data,[0.5], axis=axis, var=var) return result.squeeze() @@ -119,16 +125,19 @@ def hdquantiles_sd(data, prob=list([.25,.5,.75]), axis=None): """Computes the standard error of the Harrell-Davis quantile estimates by jackknife. -*Parameters* : - data: {ndarray} +Parameters +---------- + data: ndarray Data array. - prob: {sequence} + prob: sequence Sequence of quantiles to compute. - axis : {integer} + axis : int Axis along which to compute the quantiles. If None, use a flattened array. -*Note*: +Notes +----- The function is restricted to 2D arrays. + """ def _hdsd_1D(data,prob): "Computes the std error for 1D arrays." @@ -172,16 +181,18 @@ def trimmed_mean_ci(data, proportiontocut=0.2, alpha=0.05, axis=None): """Returns the selected confidence interval of the trimmed mean along the given axis. -*Parameters* : - data : {sequence} +Parameters +---------- + data : sequence Input data. The data is transformed to a masked array - proportiontocut : {float} + proportiontocut : float Proportion of the data to cut from each side of the data . As a result, (2*proportiontocut*n) values are actually trimmed. - alpha : {float} + alpha : float Confidence level of the intervals. - axis : {integer} + axis : int Axis along which to cut. If None, uses a flattened version of the input. + """ data = masked_array(data, copy=False) trimmed = trim_both(data, proportiontocut=proportiontocut, axis=axis) @@ -196,13 +207,15 @@ def mjci(data, prob=[0.25,0.5,0.75], axis=None): """Returns the Maritz-Jarrett estimators of the standard error of selected experimental quantiles of the data. -*Parameters* : - data: {ndarray} +Parameters +----------- + data: ndarray Data array. - prob: {sequence} + prob: sequence Sequence of quantiles to compute. - axis : {integer} + axis : int Axis along which to compute the quantiles. If None, use a flattened array. + """ def _mjci_1D(data, p): data = data.compressed() @@ -236,14 +249,15 @@ def mquantiles_cimj(data, prob=[0.25,0.50,0.75], alpha=0.05, axis=None): """Computes the alpha confidence interval for the selected quantiles of the data, with Maritz-Jarrett estimators. -*Parameters* : - data: {ndarray} +Parameters +---------- + data: ndarray Data array. - prob: {sequence} + prob: sequence Sequence of quantiles to compute. - alpha : {float} + alpha : float Confidence level of the intervals. - axis : {integer} + axis : integer Axis along which to compute the quantiles. If None, use a flattened array. """ alpha = min(alpha, 1-alpha) @@ -258,13 +272,14 @@ def median_cihs(data, alpha=0.05, axis=None): """Computes the alpha-level confidence interval for the median of the data, following the Hettmasperger-Sheather method. -*Parameters* : - data : {sequence} +Parameters +---------- + data : sequence Input data. Masked values are discarded. The input should be 1D only, or axis should be set to None. - alpha : {float} + alpha : float Confidence level of the intervals. - axis : {integer} + axis : integer Axis along which to compute the quantiles. If None, use a flattened array. """ def _cihs_1D(data, alpha): @@ -299,7 +314,8 @@ def compare_medians_ms(group_1, group_2, axis=None): The comparison is performed using the McKean-Schrader estimate of the standard error of the medians. -*Parameters* : +Parameters +---------- group_1 : {sequence} First dataset. group_2 : {sequence} @@ -307,7 +323,8 @@ error of the medians. axis : {integer} Axis along which the medians are estimated. If None, the arrays are flattened. -*Returns* : +Returns +------- A (p,) array of comparison values. """ @@ -325,22 +342,23 @@ error of the medians. #.............................................................................. def rank_data(data, axis=None, use_missing=False): """Returns the rank (also known as order statistics) of each data point -along the given axis. - -If some values are tied, their rank is averaged. -If some values are masked, their rank is set to 0 if use_missing is False, or -set to the average rank of the unmasked values if use_missing is True. - -*Parameters* : - data : {sequence} - Input data. The data is transformed to a masked array - axis : {integer} - Axis along which to perform the ranking. If None, the array is first - flattened. An exception is raised if the axis is specified for arrays - with a dimension larger than 2 - use_missing : {boolean} - Whether the masked values have a rank of 0 (False) or equal to the - average rank of the unmasked values (True). + along the given axis. + + If some values are tied, their rank is averaged. + If some values are masked, their rank is set to 0 if use_missing is False, + or set to the average rank of the unmasked values if use_missing is True. + + Parameters + ---------- + data : sequence + Input data. The data is transformed to a masked array + axis : integer + Axis along which to perform the ranking. + If None, the array is first flattened. An exception is raised if + the axis is specified for arrays with a dimension larger than 2 + use_missing : boolean + Whether the masked values have a rank of 0 (False) or equal to the + average rank of the unmasked values (True). """ # def _rank1d(data, use_missing=False): diff --git a/numpy/ma/mstats.py b/numpy/ma/mstats.py index 8daa49c4b..cd2c93c78 100644 --- a/numpy/ma/mstats.py +++ b/numpy/ma/mstats.py @@ -33,16 +33,20 @@ __all__ = ['cov','meppf','plotting_positions','meppf','mmedian','mquantiles', def winsorize(data, alpha=0.2): """Returns a Winsorized version of the input array. + + The (alpha/2.) lowest values are set to the (alpha/2.)th percentile, + and the (alpha/2.) highest values are set to the (1-alpha/2.)th + percentile. + Masked values are skipped. + + Parameters + ---------- + data : ndarray + Input data to Winsorize. The data is first flattened. + alpha : float + Percentage of total Winsorization: alpha/2. on the left, + alpha/2. on the right -The (alpha/2.) lowest values are set to the (alpha/2.)th percentile, and -the (alpha/2.) highest values are set to the (1-alpha/2.)th percentile -Masked values are skipped. - -*Parameters*: - data : {ndarray} - Input data to Winsorize. The data is first flattened. - alpha : {float}, optional - Percentage of total Winsorization : alpha/2. on the left, alpha/2. on the right """ data = masked_array(data, copy=False).ravel() idxsort = data.argsort() @@ -53,18 +57,26 @@ Masked values are skipped. #.............................................................................. def trim_both(data, proportiontocut=0.2, axis=None): - """Trims the data by masking the int(trim*n) smallest and int(trim*n) largest -values of data along the given axis, where n is the number of unmasked values. - -*Parameters*: - data : {ndarray} - Data to trim. - proportiontocut : {float} - Percentage of trimming. If n is the number of unmasked values before trimming, - the number of values after trimming is (1-2*trim)*n. - axis : {integer} - Axis along which to perform the trimming. If None, the input array is first - flattened. + """Trims the data by masking the int(trim*n) smallest and int(trim*n) + largest values of data along the given axis, where n is the number + of unmasked values. + + Parameters + ---------- + data : ndarray + Data to trim. + proportiontocut : float + Percentage of trimming. If n is the number of unmasked values + before trimming, the number of values after trimming is: + (1-2*trim)*n. + axis : int + Axis along which to perform the trimming. + If None, the input array is first flattened. + + Notes + ----- + The function works only for arrays up to 2D. + """ #................... def _trim_1D(data, trim): @@ -87,22 +99,30 @@ values of data along the given axis, where n is the number of unmasked values. #.............................................................................. def trim_tail(data, proportiontocut=0.2, tail='left', axis=None): - """Trims the data by masking int(trim*n) values from ONE tail of the data -along the given axis, where n is the number of unmasked values. - -*Parameters*: - data : {ndarray} - Data to trim. - proportiontocut : {float} - Percentage of trimming. If n is the number of unmasked values before trimming, - the number of values after trimming is (1-trim)*n. - tail : {string} - Trimming direction, in ('left', 'right'). If left, the proportiontocut - lowest values are set to the corresponding percentile. If right, the - proportiontocut highest values are used instead. - axis : {integer} - Axis along which to perform the trimming. If None, the input array is first - flattened. + """Trims the data by masking int(trim*n) values from ONE tail of the + data along the given axis, where n is the number of unmasked values. + + Parameters + ---------- + data : ndarray + Data to trim. + proportiontocut : float + Percentage of trimming. If n is the number of unmasked values + before trimming, the number of values after trimming is + (1-trim)*n. + tail : string + Trimming direction, in ('left', 'right'). + If left, the ``proportiontocut`` lowest values are set to the + corresponding percentile. If right, the ``proportiontocut`` + highest values are used instead. + axis : int + Axis along which to perform the trimming. + If None, the input array is first flattened. + + Notes + ----- + The function works only for arrays up to 2D. + """ #................... def _trim_1D(data, trim, left): @@ -138,35 +158,43 @@ along the given axis, where n is the number of unmasked values. #.............................................................................. def trimmed_mean(data, proportiontocut=0.2, axis=None): - """Returns the trimmed mean of the data along the given axis. Trimming is -performed on both ends of the distribution. - -*Parameters*: - data : {ndarray} - Data to trim. - proportiontocut : {float} - Proportion of the data to cut from each side of the data . - As a result, (2*proportiontocut*n) values are actually trimmed. - axis : {integer} - Axis along which to perform the trimming. If None, the input array is first - flattened. + """Returns the trimmed mean of the data along the given axis. + Trimming is performed on both ends of the distribution. + + Parameters + ---------- + data : ndarray + Data to trim. + proportiontocut : float + Proportion of the data to cut from each side of the data . + As a result, (2*proportiontocut*n) values are actually trimmed. + axis : int + Axis along which to perform the trimming. + If None, the input array is first flattened. + """ return trim_both(data, proportiontocut=proportiontocut, axis=axis).mean(axis=axis) #.............................................................................. def trimmed_stde(data, proportiontocut=0.2, axis=None): """Returns the standard error of the trimmed mean for the input data, -along the given axis. Trimming is performed on both ends of the distribution. - -*Parameters*: - data : {ndarray} - Data to trim. - proportiontocut : {float} - Proportion of the data to cut from each side of the data . - As a result, (2*proportiontocut*n) values are actually trimmed. - axis : {integer} - Axis along which to perform the trimming. If None, the input array is first - flattened. + along the given axis. Trimming is performed on both ends of the distribution. + + Parameters + ---------- + data : ndarray + Data to trim. + proportiontocut : float + Proportion of the data to cut from each side of the data . + As a result, (2*proportiontocut*n) values are actually trimmed. + axis : int + Axis along which to perform the trimming. + If None, the input array is first flattened. + + Notes + ----- + The function worrks with arrays up to 2D. + """ #........................ def _trimmed_stde_1D(data, trim=0.2): @@ -189,13 +217,14 @@ def stde_median(data, axis=None): """Returns the McKean-Schrader estimate of the standard error of the sample median along the given axis. + Parameters + ---------- + data : ndarray + Data to trim. + axis : int + Axis along which to perform the trimming. + If None, the input array is first flattened. -*Parameters*: - data : {ndarray} - Data to trim. - axis : {integer} - Axis along which to perform the trimming. If None, the input array is first - flattened. """ def _stdemed_1D(data): sorted = numpy.sort(data.compressed()) @@ -240,16 +269,17 @@ Typical values of (alpha,beta) are: - (.4,.4) : approximately quantile unbiased (Cunnane) - (.35,.35): APL, used with PWM -*Parameters*: - x : {sequence} +Parameters +---------- + x : sequence Input data, as a sequence or array of dimension at most 2. - prob : {sequence} + prob : sequence List of quantiles to compute. - alpha : {float} + alpha : float Plotting positions parameter. - beta : {float} + beta : float Plotting positions parameter. - axis : {integer} + axis : int Axis along which to perform the trimming. If None, the input array is first flattened. """ @@ -299,6 +329,18 @@ def plotting_positions(data, alpha=0.4, beta=0.4): if x is normally distributed (R type 9) - (.4,.4) : approximately quantile unbiased (Cunnane) - (.35,.35): APL, used with PWM + +Parameters +---------- + x : sequence + Input data, as a sequence or array of dimension at most 2. + prob : sequence + List of quantiles to compute. + alpha : float + Plotting positions parameter. + beta : float + Plotting positions parameter. + """ data = masked_array(data, copy=False).reshape(1,-1) n = data.count() @@ -311,7 +353,11 @@ meppf = plotting_positions def mmedian(data, axis=None): - """Returns the median of data along the given axis. Missing data are discarded.""" + """Returns the median of data along the given axis. + + Missing data are discarded. + + """ def _median1D(data): x = numpy.sort(data.compressed()) if x.size == 0: @@ -331,17 +377,18 @@ def cov(x, y=None, rowvar=True, bias=False, strict=False): Normalization is by (N-1) where N is the number of observations (unbiased estimate). If bias is True then normalization is by N. -*Parameters*: - x : {ndarray} +Parameters +---------- + x : ndarray Input data. If x is a 1D array, returns the variance. If x is a 2D array, returns the covariance matrix. - y : {ndarray}, optional + y : ndarray Optional set of variables. - rowvar : {boolean} + rowvar : boolean If rowvar is true, then each row is a variable with obersvations in columns. If rowvar is False, each column is a variable and the observations are in the rows. - bias : {boolean} + bias : boolean Whether to use a biased or unbiased estimate of the covariance. If bias is True, then the normalization is by N, the number of observations. Otherwise, the normalization is by (N-1) @@ -400,10 +447,10 @@ def rsh(data, points=None): """Evalutates Rosenblatt's shifted histogram estimators for each point on the dataset 'data'. -*Parameters* : - data : {sequence} +Parameters + data : sequence Input data. Masked values are ignored. - points : {sequence} + points : sequence Sequence of points where to evaluate Rosenblatt shifted histogram. If None, use the data. """ diff --git a/numpy/ma/tests/test_core.py b/numpy/ma/tests/test_core.py index 347372b9b..1d30687c6 100644 --- a/numpy/ma/tests/test_core.py +++ b/numpy/ma/tests/test_core.py @@ -838,6 +838,17 @@ class TestMA(NumpyTestCase): assert_equal(xs.mask, [0,1,0]) assert_equal(xs.dtype, '|S3') # + def test_set_records(self): + "Check setting an element of a record)" + mtype = [('f',float_),('s','|S3')] + x = array([(1,'a'),(2,'b'),(numpy.pi,'pi')], dtype=mtype) + x[0] = (10,'A') + (xf, xs) = (x['f'], x['s']) + assert_equal(xf.data, [10,2,numpy.pi]) + assert_equal(xf.dtype, float_) + assert_equal(xs.data, ['A', 'b', 'pi']) + assert_equal(xs.dtype, '|S3') + #............................................................................... |