summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
Diffstat (limited to 'numpy')
-rw-r--r--numpy/lib/polynomial.py30
-rw-r--r--numpy/lib/tests/test_polynomial.py50
2 files changed, 59 insertions, 21 deletions
diff --git a/numpy/lib/polynomial.py b/numpy/lib/polynomial.py
index 81af185eb..e3defdca2 100644
--- a/numpy/lib/polynomial.py
+++ b/numpy/lib/polynomial.py
@@ -463,9 +463,14 @@ def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False):
w : array_like, shape (M,), optional
Weights to apply to the y-coordinates of the sample points. For
gaussian uncertainties, use 1/sigma (not 1/sigma**2).
- cov : bool, optional
- Return the estimate and the covariance matrix of the estimate
- If full is True, then cov is not returned.
+ cov : bool or str, optional
+ If given and not `False`, return not just the estimate but also its
+ covariance matrix. By default, the covariance are scaled by
+ chi2/sqrt(N-dof), i.e., the weights are presumed to be unreliable
+ except in a relative sense and everything is scaled such that the
+ reduced chi2 is unity. This scaling is omitted if ``cov='unscaled'``,
+ as is relevant for the case that the weights are 1/sigma**2, with
+ sigma known to be a reliable estimate of the uncertainty.
Returns
-------
@@ -633,14 +638,17 @@ def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False):
elif cov:
Vbase = inv(dot(lhs.T, lhs))
Vbase /= NX.outer(scale, scale)
- # Some literature ignores the extra -2.0 factor in the denominator, but
- # it is included here because the covariance of Multivariate Student-T
- # (which is implied by a Bayesian uncertainty analysis) includes it.
- # Plus, it gives a slightly more conservative estimate of uncertainty.
- if len(x) <= order + 2:
- raise ValueError("the number of data points must exceed order + 2 "
- "for Bayesian estimate the covariance matrix")
- fac = resids / (len(x) - order - 2.0)
+ if cov == "unscaled":
+ fac = 1
+ else:
+ if len(x) <= order:
+ raise ValueError("the number of data points must exceed order "
+ "to scale the covariance matrix")
+ # note, this used to be: fac = resids / (len(x) - order - 2.0)
+ # it was deciced that the "- 2" (originally justified by "Bayesian
+ # uncertainty analysis") is not was the user expects
+ # (see gh-11196 and gh-11197)
+ fac = resids / (len(x) - order)
if y.ndim == 1:
return c, Vbase * fac
else:
diff --git a/numpy/lib/tests/test_polynomial.py b/numpy/lib/tests/test_polynomial.py
index 9f7c117a2..77414ba7c 100644
--- a/numpy/lib/tests/test_polynomial.py
+++ b/numpy/lib/tests/test_polynomial.py
@@ -3,7 +3,7 @@ from __future__ import division, absolute_import, print_function
import numpy as np
from numpy.testing import (
assert_, assert_equal, assert_array_equal, assert_almost_equal,
- assert_array_almost_equal, assert_raises
+ assert_array_almost_equal, assert_raises, assert_allclose
)
@@ -122,27 +122,34 @@ class TestPolynomial(object):
weights = np.arange(8, 1, -1)**2/7.0
# Check exception when too few points for variance estimate. Note that
- # the Bayesian estimate requires the number of data points to exceed
- # degree + 3.
+ # the estimate requires the number of data points to exceed
+ # degree + 1
assert_raises(ValueError, np.polyfit,
- [0, 1, 3], [0, 1, 3], deg=0, cov=True)
+ [1], [1], deg=0, cov=True)
# check 1D case
m, cov = np.polyfit(x, y+err, 2, cov=True)
est = [3.8571, 0.2857, 1.619]
assert_almost_equal(est, m, decimal=4)
- val0 = [[2.9388, -5.8776, 1.6327],
- [-5.8776, 12.7347, -4.2449],
- [1.6327, -4.2449, 2.3220]]
+ val0 = [[ 1.4694, -2.9388, 0.8163],
+ [-2.9388, 6.3673, -2.1224],
+ [ 0.8163, -2.1224, 1.161 ]]
assert_almost_equal(val0, cov, decimal=4)
m2, cov2 = np.polyfit(x, y+err, 2, w=weights, cov=True)
assert_almost_equal([4.8927, -1.0177, 1.7768], m2, decimal=4)
- val = [[8.7929, -10.0103, 0.9756],
- [-10.0103, 13.6134, -1.8178],
- [0.9756, -1.8178, 0.6674]]
+ val = [[ 4.3964, -5.0052, 0.4878],
+ [-5.0052, 6.8067, -0.9089],
+ [ 0.4878, -0.9089, 0.3337]]
assert_almost_equal(val, cov2, decimal=4)
+ m3, cov3 = np.polyfit(x, y+err, 2, w=weights, cov="unscaled")
+ assert_almost_equal([4.8927, -1.0177, 1.7768], m3, decimal=4)
+ val = [[ 0.1473, -0.1677, 0.0163],
+ [-0.1677, 0.228 , -0.0304],
+ [ 0.0163, -0.0304, 0.0112]]
+ assert_almost_equal(val, cov3, decimal=4)
+
# check 2D (n,1) case
y = y[:, np.newaxis]
c = c[:, np.newaxis]
@@ -158,6 +165,29 @@ class TestPolynomial(object):
assert_almost_equal(val0, cov[:, :, 0], decimal=4)
assert_almost_equal(val0, cov[:, :, 1], decimal=4)
+ # check order 1 (deg=0) case, were the analytic results are simple
+ np.random.seed(123)
+ y = np.random.normal(size=(4, 10000))
+ mean, cov = np.polyfit(np.zeros(y.shape[0]), y, deg=0, cov=True)
+ # Should get sigma_mean = sigma/sqrt(N) = 1./sqrt(4) = 0.5.
+ assert_allclose(mean.std(), 0.5, atol=0.01)
+ assert_allclose(np.sqrt(cov.mean()), 0.5, atol=0.01)
+ # Without scaling, since reduced chi2 is 1, the result should be the same.
+ mean, cov = np.polyfit(np.zeros(y.shape[0]), y, w=np.ones(y.shape[0]),
+ deg=0, cov="unscaled")
+ assert_allclose(mean.std(), 0.5, atol=0.01)
+ assert_almost_equal(np.sqrt(cov.mean()), 0.5)
+ # If we estimate our errors wrong, no change with scaling:
+ w = np.full(y.shape[0], 1./0.5)
+ mean, cov = np.polyfit(np.zeros(y.shape[0]), y, w=w, deg=0, cov=True)
+ assert_allclose(mean.std(), 0.5, atol=0.01)
+ assert_allclose(np.sqrt(cov.mean()), 0.5, atol=0.01)
+ # But if we do not scale, our estimate for the error in the mean will
+ # differ.
+ mean, cov = np.polyfit(np.zeros(y.shape[0]), y, w=w, deg=0, cov="unscaled")
+ assert_allclose(mean.std(), 0.5, atol=0.01)
+ assert_almost_equal(np.sqrt(cov.mean()), 0.25)
+
def test_objects(self):
from decimal import Decimal
p = np.poly1d([Decimal('4.0'), Decimal('3.0'), Decimal('2.0')])