summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--doc/release/1.16.0-notes.rst18
-rw-r--r--numpy/lib/polynomial.py30
-rw-r--r--numpy/lib/tests/test_polynomial.py50
3 files changed, 77 insertions, 21 deletions
diff --git a/doc/release/1.16.0-notes.rst b/doc/release/1.16.0-notes.rst
index 1add0983b..b77aa9698 100644
--- a/doc/release/1.16.0-notes.rst
+++ b/doc/release/1.16.0-notes.rst
@@ -251,6 +251,16 @@ single elementary function for four related but different signatures,
The ``out`` argument to these functions is now always tested for memory overlap
to avoid corrupted results when memory overlap occurs.
+New value ``unscaled`` for option ``cov`` in ``np.polyfit''
+-----------------------------------------------------------
+A further possible value has been added to the ``cov`` parameter of the
+``np.polyfit`` function. With ``cov='unscaled'`` the scaling of the covariance
+matrix is disabled completely (similar to setting ``absolute_sigma=True'' in
+``scipy.optimize.curve_fit``). This would be useful in occasions, where the
+weights are given by 1/sigma with sigma being the (known) standard errors of
+(Gaussian distributed) data points, in which case the unscaled matrix is
+already a correct estimate for the covariance matrix.
+
Detailed docstrings for scalar numeric types
--------------------------------------------
The ``help`` function, when applied to numeric types such as `np.intc`,
@@ -372,6 +382,14 @@ if ``np.positive(array)`` raises a ``TypeError``. For ``ndarray``
subclasses that override the default ``__array_ufunc__`` implementation,
the ``TypeError`` is passed on.
+The scaling of the covariance matrix in ``np.polyfit`` is different
+-------------------------------------------------------------------
+So far, ``np.polyfit`` used a non-standard factor in the scaling of the the
+covariance matrix. Namely, rather than using the standard chisq/(M-N), it
+scales it with chisq/(M-N-2) where M is the number of data points and N is the
+number of parameters. This scaling is inconsistent with other fitting programs
+such as e.g. ``scipy.optimize.curve_fit`` and was changed to chisq/(M-N).
+
``maximum`` and ``minimum`` no longer emit warnings
---------------------------------------------------
As part of code introduced in 1.10, ``float32`` and ``float64`` set invalid
diff --git a/numpy/lib/polynomial.py b/numpy/lib/polynomial.py
index 81af185eb..e3defdca2 100644
--- a/numpy/lib/polynomial.py
+++ b/numpy/lib/polynomial.py
@@ -463,9 +463,14 @@ def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False):
w : array_like, shape (M,), optional
Weights to apply to the y-coordinates of the sample points. For
gaussian uncertainties, use 1/sigma (not 1/sigma**2).
- cov : bool, optional
- Return the estimate and the covariance matrix of the estimate
- If full is True, then cov is not returned.
+ cov : bool or str, optional
+ If given and not `False`, return not just the estimate but also its
+ covariance matrix. By default, the covariance are scaled by
+ chi2/sqrt(N-dof), i.e., the weights are presumed to be unreliable
+ except in a relative sense and everything is scaled such that the
+ reduced chi2 is unity. This scaling is omitted if ``cov='unscaled'``,
+ as is relevant for the case that the weights are 1/sigma**2, with
+ sigma known to be a reliable estimate of the uncertainty.
Returns
-------
@@ -633,14 +638,17 @@ def polyfit(x, y, deg, rcond=None, full=False, w=None, cov=False):
elif cov:
Vbase = inv(dot(lhs.T, lhs))
Vbase /= NX.outer(scale, scale)
- # Some literature ignores the extra -2.0 factor in the denominator, but
- # it is included here because the covariance of Multivariate Student-T
- # (which is implied by a Bayesian uncertainty analysis) includes it.
- # Plus, it gives a slightly more conservative estimate of uncertainty.
- if len(x) <= order + 2:
- raise ValueError("the number of data points must exceed order + 2 "
- "for Bayesian estimate the covariance matrix")
- fac = resids / (len(x) - order - 2.0)
+ if cov == "unscaled":
+ fac = 1
+ else:
+ if len(x) <= order:
+ raise ValueError("the number of data points must exceed order "
+ "to scale the covariance matrix")
+ # note, this used to be: fac = resids / (len(x) - order - 2.0)
+ # it was deciced that the "- 2" (originally justified by "Bayesian
+ # uncertainty analysis") is not was the user expects
+ # (see gh-11196 and gh-11197)
+ fac = resids / (len(x) - order)
if y.ndim == 1:
return c, Vbase * fac
else:
diff --git a/numpy/lib/tests/test_polynomial.py b/numpy/lib/tests/test_polynomial.py
index 9f7c117a2..77414ba7c 100644
--- a/numpy/lib/tests/test_polynomial.py
+++ b/numpy/lib/tests/test_polynomial.py
@@ -3,7 +3,7 @@ from __future__ import division, absolute_import, print_function
import numpy as np
from numpy.testing import (
assert_, assert_equal, assert_array_equal, assert_almost_equal,
- assert_array_almost_equal, assert_raises
+ assert_array_almost_equal, assert_raises, assert_allclose
)
@@ -122,27 +122,34 @@ class TestPolynomial(object):
weights = np.arange(8, 1, -1)**2/7.0
# Check exception when too few points for variance estimate. Note that
- # the Bayesian estimate requires the number of data points to exceed
- # degree + 3.
+ # the estimate requires the number of data points to exceed
+ # degree + 1
assert_raises(ValueError, np.polyfit,
- [0, 1, 3], [0, 1, 3], deg=0, cov=True)
+ [1], [1], deg=0, cov=True)
# check 1D case
m, cov = np.polyfit(x, y+err, 2, cov=True)
est = [3.8571, 0.2857, 1.619]
assert_almost_equal(est, m, decimal=4)
- val0 = [[2.9388, -5.8776, 1.6327],
- [-5.8776, 12.7347, -4.2449],
- [1.6327, -4.2449, 2.3220]]
+ val0 = [[ 1.4694, -2.9388, 0.8163],
+ [-2.9388, 6.3673, -2.1224],
+ [ 0.8163, -2.1224, 1.161 ]]
assert_almost_equal(val0, cov, decimal=4)
m2, cov2 = np.polyfit(x, y+err, 2, w=weights, cov=True)
assert_almost_equal([4.8927, -1.0177, 1.7768], m2, decimal=4)
- val = [[8.7929, -10.0103, 0.9756],
- [-10.0103, 13.6134, -1.8178],
- [0.9756, -1.8178, 0.6674]]
+ val = [[ 4.3964, -5.0052, 0.4878],
+ [-5.0052, 6.8067, -0.9089],
+ [ 0.4878, -0.9089, 0.3337]]
assert_almost_equal(val, cov2, decimal=4)
+ m3, cov3 = np.polyfit(x, y+err, 2, w=weights, cov="unscaled")
+ assert_almost_equal([4.8927, -1.0177, 1.7768], m3, decimal=4)
+ val = [[ 0.1473, -0.1677, 0.0163],
+ [-0.1677, 0.228 , -0.0304],
+ [ 0.0163, -0.0304, 0.0112]]
+ assert_almost_equal(val, cov3, decimal=4)
+
# check 2D (n,1) case
y = y[:, np.newaxis]
c = c[:, np.newaxis]
@@ -158,6 +165,29 @@ class TestPolynomial(object):
assert_almost_equal(val0, cov[:, :, 0], decimal=4)
assert_almost_equal(val0, cov[:, :, 1], decimal=4)
+ # check order 1 (deg=0) case, were the analytic results are simple
+ np.random.seed(123)
+ y = np.random.normal(size=(4, 10000))
+ mean, cov = np.polyfit(np.zeros(y.shape[0]), y, deg=0, cov=True)
+ # Should get sigma_mean = sigma/sqrt(N) = 1./sqrt(4) = 0.5.
+ assert_allclose(mean.std(), 0.5, atol=0.01)
+ assert_allclose(np.sqrt(cov.mean()), 0.5, atol=0.01)
+ # Without scaling, since reduced chi2 is 1, the result should be the same.
+ mean, cov = np.polyfit(np.zeros(y.shape[0]), y, w=np.ones(y.shape[0]),
+ deg=0, cov="unscaled")
+ assert_allclose(mean.std(), 0.5, atol=0.01)
+ assert_almost_equal(np.sqrt(cov.mean()), 0.5)
+ # If we estimate our errors wrong, no change with scaling:
+ w = np.full(y.shape[0], 1./0.5)
+ mean, cov = np.polyfit(np.zeros(y.shape[0]), y, w=w, deg=0, cov=True)
+ assert_allclose(mean.std(), 0.5, atol=0.01)
+ assert_allclose(np.sqrt(cov.mean()), 0.5, atol=0.01)
+ # But if we do not scale, our estimate for the error in the mean will
+ # differ.
+ mean, cov = np.polyfit(np.zeros(y.shape[0]), y, w=w, deg=0, cov="unscaled")
+ assert_allclose(mean.std(), 0.5, atol=0.01)
+ assert_almost_equal(np.sqrt(cov.mean()), 0.25)
+
def test_objects(self):
from decimal import Decimal
p = np.poly1d([Decimal('4.0'), Decimal('3.0'), Decimal('2.0')])