diff options
Diffstat (limited to 'numpy/lib')
-rw-r--r-- | numpy/lib/function_base.py | 146 | ||||
-rw-r--r-- | numpy/lib/index_tricks.py | 2 | ||||
-rw-r--r-- | numpy/lib/npyio.py | 39 | ||||
-rw-r--r-- | numpy/lib/shape_base.py | 6 | ||||
-rw-r--r-- | numpy/lib/stride_tricks.py | 4 | ||||
-rw-r--r-- | numpy/lib/tests/test_function_base.py | 77 | ||||
-rw-r--r-- | numpy/lib/tests/test_recfunctions.py | 30 | ||||
-rw-r--r-- | numpy/lib/tests/test_stride_tricks.py | 5 | ||||
-rw-r--r-- | numpy/lib/utils.py | 1 |
9 files changed, 239 insertions, 71 deletions
diff --git a/numpy/lib/function_base.py b/numpy/lib/function_base.py index d22e8c047..3826715e1 100644 --- a/numpy/lib/function_base.py +++ b/numpy/lib/function_base.py @@ -18,7 +18,7 @@ from numpy.core.umath import ( mod, exp, log10 ) from numpy.core.fromnumeric import ( - ravel, nonzero, sort, partition, mean + ravel, nonzero, sort, partition, mean, any, sum ) from numpy.core.numerictypes import typecodes, number from numpy.lib.twodim_base import diag @@ -776,6 +776,7 @@ def select(condlist, choicelist, default=0): # Now that the dtype is known, handle the deprecated select([], []) case if len(condlist) == 0: + # 2014-02-24, 1.9 warnings.warn("select with an empty condition list is not possible" "and will be deprecated", DeprecationWarning) @@ -809,6 +810,7 @@ def select(condlist, choicelist, default=0): 'invalid entry in choicelist: should be boolean ndarray') if deprecated_ints: + # 2014-02-24, 1.9 msg = "select condlists containing integer ndarrays is deprecated " \ "and will be removed in the future. Use `.astype(bool)` to " \ "convert to bools." @@ -1829,9 +1831,9 @@ class vectorize(object): return _res -def cov(m, y=None, rowvar=1, bias=0, ddof=None): +def cov(m, y=None, rowvar=1, bias=0, ddof=None, fweights=None, aweights=None): """ - Estimate a covariance matrix, given data. + Estimate a covariance matrix, given data and weights. Covariance indicates the level to which two variables vary together. If we examine N-dimensional samples, :math:`X = [x_1, x_2, ... x_N]^T`, @@ -1839,6 +1841,8 @@ def cov(m, y=None, rowvar=1, bias=0, ddof=None): :math:`x_i` and :math:`x_j`. The element :math:`C_{ii}` is the variance of :math:`x_i`. + See the notes for an outline of the algorithm. + Parameters ---------- m : array_like @@ -1846,23 +1850,35 @@ def cov(m, y=None, rowvar=1, bias=0, ddof=None): Each row of `m` represents a variable, and each column a single observation of all those variables. Also see `rowvar` below. y : array_like, optional - An additional set of variables and observations. `y` has the same - form as that of `m`. + An additional set of variables and observations. `y` has the same form + as that of `m`. rowvar : int, optional If `rowvar` is non-zero (default), then each row represents a variable, with observations in the columns. Otherwise, the relationship is transposed: each column represents a variable, while the rows contain observations. bias : int, optional - Default normalization is by ``(N - 1)``, where ``N`` is the number of - observations given (unbiased estimate). If `bias` is 1, then - normalization is by ``N``. These values can be overridden by using - the keyword ``ddof`` in numpy versions >= 1.5. + Default normalization is by ``(N - 1)``, where ``N`` corresponds to the + number of observations given (unbiased estimate). If `bias` is 1, then + normalization is by ``N``. These values can be overridden by using the + keyword ``ddof`` in numpy versions >= 1.5. ddof : int, optional .. versionadded:: 1.5 - If not ``None`` normalization is by ``(N - ddof)``, where ``N`` is - the number of observations; this overrides the value implied by - ``bias``. The default value is ``None``. + If not ``None`` the default value implied by `bias` is overridden. + Note that ``ddof=1`` will return the unbiased estimate, even if both + `fweights` and `aweights` are specified, and ``ddof=0`` will return + the simple average. See the notes for the details. The default value + is ``None``. + fweights : array_like, int, optional + .. versionadded:: 1.10 + 1-D array of integer freguency weights; the number of times each + observation vector should be repeated. + aweights : array_like, optional + .. versionadded:: 1.10 + 1-D array of observation vector weights. These relative weights are + typically large for observations considered "important" and smaller for + observations considered less "important". If ``ddof=0`` the array of + weights can be used to assign probabilities to observation vectors. Returns ------- @@ -1873,6 +1889,22 @@ def cov(m, y=None, rowvar=1, bias=0, ddof=None): -------- corrcoef : Normalized covariance matrix + Notes + ----- + Assume that the observations are in the columns of the observation + array `m` and let ``f = fweights`` and ``a = aweights`` for brevity. The + steps to compute the weighted covariance are as follows:: + + >>> w = f * a + >>> v1 = np.sum(w) + >>> v2 = np.sum(w * a) + >>> m -= np.sum(m * w, axis=1, keepdims=True) / v1 + >>> cov = np.dot(m * w, m.T) * v1 / (v1**2 - ddof * v2) + + Note that when ``a == 1``, the normalization factor + ``v1 / (v1**2 - ddof * v2)`` goes over to ``1 / (np.sum(f) - ddof)`` + as it should. + Examples -------- Consider two variables, :math:`x_0` and :math:`x_1`, which @@ -1921,36 +1953,78 @@ def cov(m, y=None, rowvar=1, bias=0, ddof=None): y = np.asarray(y) dtype = np.result_type(m, y, np.float64) X = array(m, ndmin=2, dtype=dtype) + if rowvar == 0 and X.shape[0] != 1: + X = X.T + if X.shape[0] == 0: + return np.array([]).reshape(0, 0) + if y is not None: + y = array(y, copy=False, ndmin=2, dtype=dtype) + if rowvar == 0 and y.shape[0] != 1: + y = y.T + X = np.vstack((X, y)) - if X.shape[0] == 1: - rowvar = 1 - if rowvar: - N = X.shape[1] - axis = 0 - else: - N = X.shape[0] - axis = 1 - - # check ddof if ddof is None: if bias == 0: ddof = 1 else: ddof = 0 - fact = float(N - ddof) + + # Get the product of frequencies and weights + w = None + if fweights is not None: + fweights = np.asarray(fweights, dtype=np.float) + if not np.all(fweights == np.around(fweights)): + raise TypeError( + "fweights must be integer") + if fweights.ndim > 1: + raise RuntimeError( + "cannot handle multidimensional fweights") + if fweights.shape[0] != X.shape[1]: + raise RuntimeError( + "incompatible numbers of samples and fweights") + if any(fweights < 0): + raise ValueError( + "fweights cannot be negative") + w = fweights + if aweights is not None: + aweights = np.asarray(aweights, dtype=np.float) + if aweights.ndim > 1: + raise RuntimeError( + "cannot handle multidimensional aweights") + if aweights.shape[0] != X.shape[1]: + raise RuntimeError( + "incompatible numbers of samples and aweights") + if any(aweights < 0): + raise ValueError( + "aweights cannot be negative") + if w is None: + w = aweights + else: + w *= aweights + + avg, w_sum = average(X, axis=1, weights=w, returned=True) + w_sum = w_sum[0] + + # Determine the normalization + if w is None: + fact = float(X.shape[1] - ddof) + elif ddof == 0: + fact = w_sum + elif aweights is None: + fact = w_sum - ddof + else: + fact = w_sum - ddof*sum(w*aweights)/w_sum + if fact <= 0: warnings.warn("Degrees of freedom <= 0 for slice", RuntimeWarning) fact = 0.0 - if y is not None: - y = array(y, copy=False, ndmin=2, dtype=dtype) - X = concatenate((X, y), axis) - - X -= X.mean(axis=1-axis, keepdims=True) - if not rowvar: - return (dot(X.T, X.conj()) / fact).squeeze() + X -= avg[:, None] + if w is None: + X_T = X.T else: - return (dot(X, X.T.conj()) / fact).squeeze() + X_T = (X*w).T + return (dot(X, X_T.conj())/fact).squeeze() def corrcoef(x, y=None, rowvar=1, bias=np._NoValue, ddof=np._NoValue): @@ -2003,6 +2077,7 @@ def corrcoef(x, y=None, rowvar=1, bias=np._NoValue, ddof=np._NoValue): safely ignored in this and previous versions of numpy. """ if bias is not np._NoValue or ddof is not np._NoValue: + # 2015-03-15, 1.10 warnings.warn('bias and ddof have no affect and are deprecated', DeprecationWarning) c = cov(x, y, rowvar) @@ -2248,7 +2323,7 @@ def hanning(M): .. math:: w(n) = 0.5 - 0.5cos\\left(\\frac{2\\pi{n}}{M-1}\\right) \\qquad 0 \\leq n \\leq M-1 - The Hanning was named for Julius van Hann, an Austrian meteorologist. + The Hanning was named for Julius von Hann, an Austrian meteorologist. It is also known as the Cosine Bell. Some authors prefer that it be called a Hann window, to help avoid confusion with the very similar Hamming window. @@ -3554,6 +3629,7 @@ def delete(arr, obj, axis=None): ndim = arr.ndim axis = ndim - 1 if ndim == 0: + # 2013-09-24, 1.9 warnings.warn( "in the future the special handling of scalars will be removed " "from delete and raise an error", DeprecationWarning) @@ -3648,6 +3724,7 @@ def delete(arr, obj, axis=None): if not np.can_cast(obj, intp, 'same_kind'): # obj.size = 1 special case always failed and would just # give superfluous warnings. + # 2013-09-24, 1.9 warnings.warn( "using a non-integer array as obj in delete will result in an " "error in the future", DeprecationWarning) @@ -3657,6 +3734,7 @@ def delete(arr, obj, axis=None): # Test if there are out of bound indices, this is deprecated inside_bounds = (obj < N) & (obj >= -N) if not inside_bounds.all(): + # 2013-09-24, 1.9 warnings.warn( "in the future out of bounds indices will raise an error " "instead of being ignored by `numpy.delete`.", @@ -3715,7 +3793,7 @@ def insert(arr, obj, values, axis=None): See Also -------- append : Append elements at the end of an array. - concatenate : Join a sequence of arrays together. + concatenate : Join a sequence of arrays along an existing axis. delete : Delete elements from an array. Notes @@ -3789,6 +3867,7 @@ def insert(arr, obj, values, axis=None): if (axis < 0): axis += ndim if (ndim == 0): + # 2013-09-24, 1.9 warnings.warn( "in the future the special handling of scalars will be removed " "from insert and raise an error", DeprecationWarning) @@ -3860,6 +3939,7 @@ def insert(arr, obj, values, axis=None): indices = indices.astype(intp) if not np.can_cast(indices, intp, 'same_kind'): + # 2013-09-24, 1.9 warnings.warn( "using a non-integer array as obj in insert will result in an " "error in the future", DeprecationWarning) diff --git a/numpy/lib/index_tricks.py b/numpy/lib/index_tricks.py index 752407f18..030261dee 100644 --- a/numpy/lib/index_tricks.py +++ b/numpy/lib/index_tricks.py @@ -404,7 +404,7 @@ class RClass(AxisConcatenator): See Also -------- - concatenate : Join a sequence of arrays together. + concatenate : Join a sequence of arrays along an existing axis. c_ : Translates slice objects to concatenation along the second axis. Examples diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index ec89397a0..078c6d7ca 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -1301,13 +1301,12 @@ def fromregex(file, regexp, dtype): def genfromtxt(fname, dtype=float, comments='#', delimiter=None, - skiprows=0, skip_header=0, skip_footer=0, converters=None, - missing='', missing_values=None, filling_values=None, - usecols=None, names=None, - excludelist=None, deletechars=None, replace_space='_', - autostrip=False, case_sensitive=True, defaultfmt="f%i", - unpack=None, usemask=False, loose=True, invalid_raise=True, - max_rows=None): + skip_header=0, skip_footer=0, converters=None, + missing_values=None, filling_values=None, usecols=None, + names=None, excludelist=None, deletechars=None, + replace_space='_', autostrip=False, case_sensitive=True, + defaultfmt="f%i", unpack=None, usemask=False, loose=True, + invalid_raise=True, max_rows=None): """ Load data from a text file, with missing values handled as specified. @@ -1332,8 +1331,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, whitespaces act as delimiter. An integer or sequence of integers can also be provided as width(s) of each field. skiprows : int, optional - `skiprows` was deprecated in numpy 1.5, and will be removed in - numpy 2.0. Please use `skip_header` instead. + `skiprows` was removed in numpy 1.10. Please use `skip_header` instead. skip_header : int, optional The number of lines to skip at the beginning of the file. skip_footer : int, optional @@ -1343,8 +1341,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, The converters can also be used to provide a default value for missing data: ``converters = {3: lambda s: float(s or 0)}``. missing : variable, optional - `missing` was deprecated in numpy 1.5, and will be removed in - numpy 2.0. Please use `missing_values` instead. + `missing` was removed in numpy 1.10. Please use `missing_values` + instead. missing_values : variable, optional The set of strings corresponding to missing data. filling_values : variable, optional @@ -1475,8 +1473,6 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, comments = asbytes(comments) if isinstance(delimiter, unicode): delimiter = asbytes(delimiter) - if isinstance(missing, unicode): - missing = asbytes(missing) if isinstance(missing_values, (unicode, list, tuple)): missing_values = asbytes_nested(missing_values) @@ -1513,13 +1509,6 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, case_sensitive=case_sensitive, replace_space=replace_space) - # Get the first valid lines after the first skiprows ones .. - if skiprows: - warnings.warn( - "The use of `skiprows` is deprecated, it will be removed in " - "numpy 2.0.\nPlease use `skip_header` instead.", - DeprecationWarning) - skip_header = skiprows # Skip the first `skip_header` rows for i in range(skip_header): next(fhd) @@ -1648,16 +1637,6 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, for entry in missing_values: entry.extend([str(user_missing_values)]) - # Process the deprecated `missing` - if missing != asbytes(''): - warnings.warn( - "The use of `missing` is deprecated, it will be removed in " - "Numpy 2.0.\nPlease use `missing_values` instead.", - DeprecationWarning) - values = [str(_) for _ in missing.split(asbytes(","))] - for entry in missing_values: - entry.extend(values) - # Process the filling_values ............................... # Rename the input for convenience user_filling_values = filling_values diff --git a/numpy/lib/shape_base.py b/numpy/lib/shape_base.py index 011434dda..280765df8 100644 --- a/numpy/lib/shape_base.py +++ b/numpy/lib/shape_base.py @@ -338,9 +338,10 @@ def dstack(tup): See Also -------- + stack : Join a sequence of arrays along a new axis. vstack : Stack along first axis. hstack : Stack along second axis. - concatenate : Join arrays. + concatenate : Join a sequence of arrays along an existing axis. dsplit : Split array along third axis. Notes @@ -477,7 +478,8 @@ def split(ary,indices_or_sections,axis=0): hsplit : Split array into multiple sub-arrays horizontally (column-wise). vsplit : Split array into multiple sub-arrays vertically (row wise). dsplit : Split array into multiple sub-arrays along the 3rd axis (depth). - concatenate : Join arrays together. + concatenate : Join a sequence of arrays along an existing axis. + stack : Join a sequence of arrays along a new axis. hstack : Stack arrays in sequence horizontally (column wise). vstack : Stack arrays in sequence vertically (row wise). dstack : Stack arrays in sequence depth wise (along third dimension). diff --git a/numpy/lib/stride_tricks.py b/numpy/lib/stride_tricks.py index c05d56e2f..416776ff4 100644 --- a/numpy/lib/stride_tricks.py +++ b/numpy/lib/stride_tricks.py @@ -126,6 +126,10 @@ def _broadcast_shape(*args): b = np.broadcast(*args[:32]) # unfortunately, it cannot handle 32 or more arguments directly for pos in range(32, len(args), 31): + # ironically, np.broadcast does not properly handle np.broadcast + # objects (it treats them as scalars) + # use broadcasting to avoid allocating the full array + b = broadcast_to(0, b.shape) b = np.broadcast(b, *args[pos:(pos + 31)]) return b.shape diff --git a/numpy/lib/tests/test_function_base.py b/numpy/lib/tests/test_function_base.py index 12f9d414b..ad71fd3fa 100644 --- a/numpy/lib/tests/test_function_base.py +++ b/numpy/lib/tests/test_function_base.py @@ -1391,9 +1391,20 @@ class TestCorrCoef(TestCase): class TestCov(TestCase): + x1 = np.array([[0, 2], [1, 1], [2, 0]]).T + res1 = np.array([[1., -1.], [-1., 1.]]) + x2 = np.array([0.0, 1.0, 2.0], ndmin=2) + frequencies = np.array([1, 4, 1]) + x2_repeats = np.array([[0.0], [1.0], [1.0], [1.0], [1.0], [2.0]]).T + res2 = np.array([[0.4, -0.4], [-0.4, 0.4]]) + unit_frequencies = np.ones(3, dtype=np.integer) + weights = np.array([1.0, 4.0, 1.0]) + res3 = np.array([[2./3., -2./3.], [-2./3., 2./3.]]) + unit_weights = np.ones(3) + x3 = np.array([0.3942, 0.5969, 0.7730, 0.9918, 0.7964]) + def test_basic(self): - x = np.array([[0, 2], [1, 1], [2, 0]]).T - assert_allclose(cov(x), np.array([[1., -1.], [-1., 1.]])) + assert_allclose(cov(self.x1), self.res1) def test_complex(self): x = np.array([[1, 2, 3], [1j, 2j, 3j]]) @@ -1414,11 +1425,67 @@ class TestCov(TestCase): np.array([[np.nan, np.nan], [np.nan, np.nan]])) def test_wrong_ddof(self): - x = np.array([[0, 2], [1, 1], [2, 0]]).T with warnings.catch_warnings(record=True): warnings.simplefilter('always', RuntimeWarning) - assert_array_equal(cov(x, ddof=5), - np.array([[np.inf, -np.inf], [-np.inf, np.inf]])) + assert_array_equal(cov(self.x1, ddof=5), + np.array([[np.inf, -np.inf], + [-np.inf, np.inf]])) + + def test_1D_rowvar(self): + assert_allclose(cov(self.x3), cov(self.x3, rowvar=0)) + y = np.array([0.0780, 0.3107, 0.2111, 0.0334, 0.8501]) + assert_allclose(cov(self.x3, y), cov(self.x3, y, rowvar=0)) + + def test_1D_variance(self): + assert_allclose(cov(self.x3, ddof=1), np.var(self.x3, ddof=1)) + + def test_fweights(self): + assert_allclose(cov(self.x2, fweights=self.frequencies), + cov(self.x2_repeats)) + assert_allclose(cov(self.x1, fweights=self.frequencies), + self.res2) + assert_allclose(cov(self.x1, fweights=self.unit_frequencies), + self.res1) + nonint = self.frequencies + 0.5 + assert_raises(TypeError, cov, self.x1, fweights=nonint) + f = np.ones((2, 3), dtype=np.integer) + assert_raises(RuntimeError, cov, self.x1, fweights=f) + f = np.ones(2, dtype=np.integer) + assert_raises(RuntimeError, cov, self.x1, fweights=f) + f = -1*np.ones(3, dtype=np.integer) + assert_raises(ValueError, cov, self.x1, fweights=f) + + def test_aweights(self): + assert_allclose(cov(self.x1, aweights=self.weights), self.res3) + assert_allclose(cov(self.x1, aweights=3.0*self.weights), + cov(self.x1, aweights=self.weights)) + assert_allclose(cov(self.x1, aweights=self.unit_weights), self.res1) + w = np.ones((2, 3)) + assert_raises(RuntimeError, cov, self.x1, aweights=w) + w = np.ones(2) + assert_raises(RuntimeError, cov, self.x1, aweights=w) + w = -1.0*np.ones(3) + assert_raises(ValueError, cov, self.x1, aweights=w) + + def test_unit_fweights_and_aweights(self): + assert_allclose(cov(self.x2, fweights=self.frequencies, + aweights=self.unit_weights), + cov(self.x2_repeats)) + assert_allclose(cov(self.x1, fweights=self.frequencies, + aweights=self.unit_weights), + self.res2) + assert_allclose(cov(self.x1, fweights=self.unit_frequencies, + aweights=self.unit_weights), + self.res1) + assert_allclose(cov(self.x1, fweights=self.unit_frequencies, + aweights=self.weights), + self.res3) + assert_allclose(cov(self.x1, fweights=self.unit_frequencies, + aweights=3.0*self.weights), + cov(self.x1, aweights=self.weights)) + assert_allclose(cov(self.x1, fweights=self.unit_frequencies, + aweights=self.unit_weights), + self.res1) class Test_I0(TestCase): diff --git a/numpy/lib/tests/test_recfunctions.py b/numpy/lib/tests/test_recfunctions.py index 51a2077eb..13e75cbd0 100644 --- a/numpy/lib/tests/test_recfunctions.py +++ b/numpy/lib/tests/test_recfunctions.py @@ -700,6 +700,36 @@ class TestJoinBy2(TestCase): assert_equal(test.dtype, control.dtype) assert_equal(test, control) +class TestAppendFieldsObj(TestCase): + """ + Test append_fields with arrays containing objects + """ + # https://github.com/numpy/numpy/issues/2346 + + def setUp(self): + from datetime import date + self.data = dict(obj=date(2000, 1, 1)) + + def test_append_to_objects(self): + "Test append_fields when the base array contains objects" + obj = self.data['obj'] + x = np.array([(obj, 1.), (obj, 2.)], + dtype=[('A', object), ('B', float)]) + y = np.array([10, 20], dtype=int) + test = append_fields(x, 'C', data=y, usemask=False) + control = np.array([(obj, 1.0, 10), (obj, 2.0, 20)], + dtype=[('A', object), ('B', float), ('C', int)]) + assert_equal(test, control) + + def test_append_with_objects(self): + "Test append_fields when the appended data contains objects" + obj = self.data['obj'] + x = np.array([(10, 1.), (20, 2.)], dtype=[('A', int), ('B', float)]) + y = np.array([obj, obj], dtype=object) + test = append_fields(x, 'C', data=y, dtypes=object, usemask=False) + control = np.array([(10, 1.0, obj), (20, 2.0, obj)], + dtype=[('A', int), ('B', float), ('C', object)]) + assert_equal(test, control) if __name__ == '__main__': run_module_suite() diff --git a/numpy/lib/tests/test_stride_tricks.py b/numpy/lib/tests/test_stride_tricks.py index e079e0bf4..44112c970 100644 --- a/numpy/lib/tests/test_stride_tricks.py +++ b/numpy/lib/tests/test_stride_tricks.py @@ -273,6 +273,11 @@ def test_broadcast_shape(): assert_equal(_broadcast_shape(*([np.ones((1, 2))] * 32)), (1, 2)) assert_equal(_broadcast_shape(*([np.ones((1, 2))] * 100)), (1, 2)) + # regression tests for gh-5862 + assert_equal(_broadcast_shape(*([np.ones(2)] * 32 + [1])), (2,)) + bad_args = [np.ones(2)] * 32 + [np.ones(3)] * 32 + assert_raises(ValueError, lambda: _broadcast_shape(*bad_args)) + def test_as_strided(): a = np.array([None]) diff --git a/numpy/lib/utils.py b/numpy/lib/utils.py index 519d0e9b9..5a92cc316 100644 --- a/numpy/lib/utils.py +++ b/numpy/lib/utils.py @@ -1011,6 +1011,7 @@ class SafeEval(object): """ def __init__(self): + # 2014-10-15, 1.10 warnings.warn("SafeEval is deprecated in 1.10 and will be removed.", DeprecationWarning) |