diff options
author | dhuard <dhuard@localhost> | 2008-03-19 01:33:11 +0000 |
---|---|---|
committer | dhuard <dhuard@localhost> | 2008-03-19 01:33:11 +0000 |
commit | 4482e5fdbcbde307ac5a7bd3a3d014101129f4fd (patch) | |
tree | 75f553e71073e4b3e89461a83a7694ae6def0e27 /numpy/lib | |
parent | 38ad50efa0e871e8f0e17babe015242a19b13d58 (diff) | |
download | numpy-4482e5fdbcbde307ac5a7bd3a3d014101129f4fd.tar.gz |
Clean up of average function. weights now should have the same shape as a, or be 1D with length equal to the shape of a along axis. A number of tests are added.
Diffstat (limited to 'numpy/lib')
-rw-r--r-- | numpy/lib/function_base.py | 96 | ||||
-rw-r--r-- | numpy/lib/tests/test_function_base.py | 58 |
2 files changed, 92 insertions, 62 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index 07a69e762..95cffa805 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -327,66 +327,50 @@ def histogramdd(sample, bins=10, range=None, normed=False, weights=None): def average(a, axis=None, weights=None, returned=False): - """Average the array over the given axis. + """Return the weighted average of array a over the given axis. + - Average over the specified axis using the given weights. The average is - taken over all array elements by default. The default values of the - weights is one. When the weights are given, then they must be - broadcastable to the shape of a when the average is taken over all - elements, otherwise they must fill a 1D array of the same length as the - axis. - Parameters ---------- a : array_like - Array containing data to be averaged. + Data to be averaged. axis : {None, integer}, optional - Axis to be averaged over. If axis is None, the the average is taken - over all elements in the array. + Axis along which to average a. If None, averaging is done over the + entire array irrespective of its shape. weights : {None, array_like}, optional - A weighted average is formed using the given weights. If weights=None - then all weights are taken to be one. If axis=None, the the shape of - the weights must be broadcastable to the shape of a, other wise - weights must be 1D and of the same length as the specified axis. + The importance each datum has in the computation of the + average. The weights array can either be 1D, in which case its length + must be the size of a along the given axis, or of the same shape as a. + If weights=None, all data are assumed to have weight equal to one. returned :{False, boolean}, optional - When true, then a tuple (average, sum_of_weights) is returned, - otherwise just the average. If the weights are all one the sum of the - weights will also be the number of elements averaged over. + If True, the tuple (average, sum_of_weights) is returned, + otherwise only the average is returmed. Note that if weights=None, then + the sum of the weights is also the number of elements averaged over. Returns ------- average, [sum_of_weights] : {array_type, double} - Returns the average along the specified axis by default. When returned - is True, the returns a tuple with the average as the first element and - the sum of the weights as the second element. The return type is - Float if a is of integer type, otherwise it is of the same type as a. - When returned, sum_of_weights is a scalar with the same type as the - average. + Return the average along the specified axis. When returned is True, + return a tuple with the average as the first element and the sum + of the weights as the second element. The return type is Float if a is + of integer type, otherwise it is of the same type as a. + sum_of_weights is has the same type as the average. + + Example + ------- + >>> average(range(1,11), weights=range(10,0,-1)) + 4.0 + Exceptions ---------- ZeroDivisionError - Results when all weights are zero.if appropriate. The version in MA - does not, it returns masked values. + Raised when all weights along axis are zero. See numpy.ma.average for a + version robust to this type of error. TypeError - Results when both an axis and weights are specified and the weights are - not an 1D array. - - Notes - ----- - The default behavior is equivalent to - - a.mean(axis). - - If weights are given, and axis=None, then the result is equivalent to - - sum(a * weights) / (a.size/weights.size)*sum(weights)), - - In the case when the axis is not the default, then the result is equivalent - to weights broadcast over the specified axis, then - - sum(a * weights)/sum(weights) - + Raised when the length of 1D weights is not the same as the shape of a + along axis. + """ if not isinstance(a, np.matrix) : a = np.asarray(a) @@ -397,19 +381,27 @@ def average(a, axis=None, weights=None, returned=False): else : a = a + 0.0 wgt = np.array(weights, dtype=a.dtype, copy=0) - scl = wgt.sum() - if axis is not None and wgt.ndim != 1 : - raise TypeError, 'Weights must be 1D when axis is specified' - if scl == 0.0: - raise ZeroDivisionError, "Weights sum to zero, can't be normalized" - if axis is None : - scl = scl*(a.size/wgt.size) - else: + # Sanity checks + if a.shape != wgt.shape : + if axis is None : + raise TypeError, "Axis must be specified when shapes of a and weights differ." + if wgt.ndim != 1 : + raise TypeError, "1D weights expected when shapes of a and weights differ." + if wgt.shape[0] != a.shape[axis] : + raise ValueError, "Length of weights not compatible with specified axis." + + # setup wgt to broadcast along axis wgt = np.array(wgt, copy=0, ndmin=a.ndim).swapaxes(-1,axis) + + scl = wgt.sum(axis=axis) + if (scl == 0.0).any(): + raise ZeroDivisionError, "Weights sum to zero, can't be normalized" + avg = np.multiply(a,wgt).sum(axis)/scl if returned: + scl = np.multiply(avg,0) + scl return avg, scl else: return avg diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index 8db4d50a6..d1786969d 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -57,25 +57,63 @@ class TestAverage(NumpyTestCase): assert_almost_equal(y5.mean(0), average(y5, 0)) assert_almost_equal(y5.mean(1), average(y5, 1)) - def check_weighted(self): + y6 = matrix(rand(5,5)) + assert_array_equal(y6.mean(0), average(y6,0)) + + def check_weights(self): + y = arange(10) + w = arange(10) + assert_almost_equal(average(y, weights=w), (arange(10)**2).sum()*1./arange(10).sum()) + y1 = array([[1,2,3],[4,5,6]]) - actual = average(y1,weights=[1,2],axis=0) + w0 = [1,2] + actual = average(y1,weights=w0,axis=0) desired = array([3.,4.,5.]) assert_almost_equal(actual, desired) - def check_shape(self): - y = array([[1,2,3],[4,5,6]]) - - # this is not a valid test as documented in average. Should it be? - #w2 = [[0,0,1],[0,0,1]] - #desired = array([3., 6.]) - #assert_array_equal(average(y, weights=w2, axis=1), desired) w1 = [0,0,1] desired = array([3., 6.]) - assert_almost_equal(average(y, weights=w1, axis=1), desired) + assert_almost_equal(average(y1, weights=w1, axis=1), desired) + + # This should raise an error. Can we test for that ? + # assert_equal(average(y1, weights=w1), 9./2.) + + + # 2D Case + w2 = [[0,0,1],[0,0,2]] + desired = array([3., 6.]) + assert_array_equal(average(y1, weights=w2, axis=1), desired) + + assert_equal(average(y1, weights=w2), 5.) + + def check_returned(self): + y = array([[1,2,3],[4,5,6]]) + # No weights + avg, scl = average(y, returned=True) + assert_equal(scl, 6.) + + avg, scl = average(y, 0, returned=True) + assert_array_equal(scl, array([2.,2.,2.])) + + avg, scl = average(y, 1, returned=True) + assert_array_equal(scl, array([3.,3.])) + + # With weights + w0 = [1,2] + avg, scl = average(y, weights=w0, axis=0, returned=True) + assert_array_equal(scl, array([3., 3., 3.])) + + w1 = [1,2,3] + avg, scl = average(y, weights=w1, axis=1, returned=True) + assert_array_equal(scl, array([6., 6.])) + + w2 = [[0,0,1],[1,2,3]] + avg, scl = average(y, weights=w2, axis=1, returned=True) + assert_array_equal(scl, array([1.,6.])) + class TestSelect(NumpyTestCase): def _select(self,cond,values,default=0): |