Add a rcond parameter to the polyfit function and give it the double precision

default value that dgelsd uses (rcondx=-1) on the principle of least surprise. Values of rcond less than this can also be useful, so a warning message and a bit of explanation was added to the documentation. The default value used by lstsq was set to the default (-1), and rcond in pinv has a default value of 1e-15.
author: Charles Harris <charlesr.harris@gmail.com> 2006-10-13 17:08:24 +0000
committer: Charles Harris <charlesr.harris@gmail.com> 2006-10-13 17:08:24 +0000
commit: 4a69a27cf3edc60a1304110e5b9bf4090b8c6ec5 (patch)
tree: 69eda45e8b3a73382597b7ef1b1eed3b876d9716
parent: 9361c17caa2816d755e2ce2a5faa860ea2e675c2 (diff)
download: numpy-4a69a27cf3edc60a1304110e5b9bf4090b8c6ec5.tar.gz
2 files changed, 51 insertions, 38 deletions
diff --git a/numpy/lib/polynomial.py b/numpy/lib/polynomial.py
index 9911c878f..c87d60f51 100644
--- a/numpy/lib/polynomial.py
+++ b/numpy/lib/polynomial.py
@@ -30,13 +30,13 @@ def _eigvals(arg):
         get_linalg_funcs()
         return eigvals(arg)
 
-def _lstsq(X, y):
+def _lstsq(X, y, rcond):
     "Do least squares on the arguments"
     try:
-        return lstsq(X, y)
+        return lstsq(X, y, rcond)
     except TypeError:
         get_linalg_funcs()
-        return lstsq(X, y)
+        return lstsq(X, y, rcond)
 
 def poly(seq_of_zeros):
     """ Return a sequence representing a polynomial given a sequence of roots.
@@ -169,10 +169,10 @@ def polyder(p, m=1):
             val = poly1d(val)
         return val
 
-def polyfit(x, y, N):
+def polyfit(x, y, N, rcond=-1):
     """
 
-    Do a best fit polynomial of order N of y to x.  Return value is a
+    Do a best fit polynomial of degree N of y to x.  Return value is a
     vector of polynomial coefficients [pk ... p1 p0].  Eg, for N=2
 
       p2*x0^2 +  p1*x0 + p0 = y1
@@ -195,7 +195,22 @@ def polyfit(x, y, N):
 
       p = (XT*X)^-1 * XT * y
 
-    where XT is the transpose of X and -1 denotes the inverse.
+    where XT is the transpose of X gand -1 denotes the inverse. However, this
+    method is susceptible to rounding errors and generally the singular value
+    decomposition is preferred and that is the method used here. The singular
+    value method takes a paramenter, 'rcond', which sets a limit on the
+    relative size of the smallest singular value to be used in solving the
+    equation. The default value of rcond is the double precision machine
+    precision as the actual solution is carried out in double precision.
+
+    WARNING: Power series fits are full of pitfalls for the unwary once the
+    degree of the fit get up around 4 or 5. Computation of the polynomial
+    values are sensitive to coefficient errors and the Vandermonde matrix is
+    ill conditioned. The knobs available to tune the fit are degree and rcond.
+    The rcond knob is a bit flaky and it can be useful to use values of rcond
+    less than the machine precision, 1e-24 for instance, but the quality of the
+    resulting fit needs to be checked against the data. The quality of
+    polynomial fits *can not* be taken for granted.
 
     For more info, see
     http://mathworld.wolfram.com/LeastSquaresFittingPolynomial.html,
@@ -205,12 +220,10 @@ def polyfit(x, y, N):
     See also polyval
 
     """
-    x = NX.asarray(x)+0.
-    y = NX.asarray(y)+0.
-    y = NX.reshape(y, (len(y), 1))
-    X = vander(x, N+1)
-    c, resids, rank, s = _lstsq(X, y)
-    c.shape = (N+1,)
+    x = NX.asarray(x) + 0.0
+    y = NX.asarray(y) + 0.0
+    v = vander(x, N+1)
+    c, resids, rank, s = _lstsq(v, y, rcond)
     return c
 
 
diff --git a/numpy/linalg/linalg.py b/numpy/linalg/linalg.py
index 9f527c7f4..2d72c7a8b 100644
--- a/numpy/linalg/linalg.py
+++ b/numpy/linalg/linalg.py
@@ -140,12 +140,12 @@ def solvetensor(a, b, axes=None):
             allaxes.remove(k)
             allaxes.insert(an, k)
         a = a.transpose(allaxes)
-        
+
     oldshape = a.shape[-(an-b.ndim):]
     prod = 1
     for k in oldshape:
         prod *= k
-    
+
     a = a.reshape(-1,prod)
     b = b.ravel()
     res = solve(a, b)
@@ -184,17 +184,17 @@ def solve(a, b):
 def invtensor(a, ind=2):
     """Find the inverse tensor.
 
-    ind > 0 ==> first (ind) indices of a are summed over 
+    ind > 0 ==> first (ind) indices of a are summed over
     ind < 0 ==> last (-ind) indices of a are summed over
 
     if ind is a list, then it specifies the summed over axes
 
     When the inv tensor and the tensor are summed over the
-    indicated axes a separable identity tensor remains. 
+    indicated axes a separable identity tensor remains.
 
     The inverse has the summed over axes at the end.
     """
-    
+
     a = asarray(a)
     oldshape = a.shape
     prod = 1
@@ -217,7 +217,7 @@ def invtensor(a, ind=2):
     a = a.reshape(-1,prod)
     ia = inv(a)
     return ia.reshape(*invshape)
-    
+
 
 # Matrix inversion
 
@@ -250,7 +250,7 @@ def cholesky(a):
 
 def qr(a, mode='full'):
     """cacluates A=QR, Q orthonormal, R upper triangular
-    
+
     mode:  'full' --> (Q,R)
            'r'    --> R
            'economic' --> A2 where the diagonal + upper triangle
@@ -267,8 +267,8 @@ def qr(a, mode='full'):
         routine_name = 'zgeqrf'
     else:
         lapack_routine = lapack_lite.dgeqrf
-        routine_name = 'dgeqrf'        
-        
+        routine_name = 'dgeqrf'
+
     # calculate optimal size of work data 'work'
     lwork = 1
     work = zeros((lwork,), t)
@@ -307,7 +307,7 @@ def qr(a, mode='full'):
 
     if isComplexType(t):
         lapack_routine = lapack_lite.zungqr
-        routine_name = 'zungqr'                
+        routine_name = 'zungqr'
     else:
         lapack_routine = lapack_lite.dorgqr
         routine_name = 'dorgqr'
@@ -326,7 +326,7 @@ def qr(a, mode='full'):
 
     if results['info'] > 0:
         raise LinAlgError, '%s returns %d' % (routine_name, results['info'])
-    
+
     q = a[:mn,:].transpose()
 
     if (q.dtype != result_t):
@@ -476,7 +476,7 @@ eigenvalue u[i].  Satisfies the equation dot(a, v[:,i]) = u[i]*v[:,i]
                 v[ind[2*i]] = vr[ind[2*i]] + 1j*vr[ind[2*i+1]]
                 v[ind[2*i+1]] = vr[ind[2*i]] - 1j*vr[ind[2*i+1]]
             result_t = _complexType(result_t)
-            
+
     if results['info'] > 0:
         raise LinAlgError, 'Eigenvalues did not converge'
     vt = v.transpose().astype(result_t)
@@ -534,7 +534,7 @@ def svd(a, full_matrices=1, compute_uv=1):
     u,s,vh = svd(a)
 
     If a is an M x N array, then the svd produces a factoring of the array
-    into two unitary (orthogonal) 2-d arrays u (MxM) and vh (NxN) and a 
+    into two unitary (orthogonal) 2-d arrays u (MxM) and vh (NxN) and a
     min(M,N)-length array of singular values such that
 
                      a == dot(u,dot(S,vh))
@@ -613,10 +613,10 @@ def svd(a, full_matrices=1, compute_uv=1):
 
 # Generalized inverse
 
-def pinv(a, rcond = 1.e-10):
+def pinv(a, rcond=1e-15 ):
     """Return the (Moore-Penrose) pseudo-inverse of a 2-d array
 
-    This method computes the generalized inverse using the 
+    This method computes the generalized inverse using the
     singular-value decomposition and all singular values larger than
     rcond of the largest.
     """
@@ -660,18 +660,18 @@ def det(a):
 
 # Linear Least Squares
 
-def lstsq(a, b, rcond=1.e-10):
+def lstsq(a, b, rcond=-1):
     """returns x,resids,rank,s
-where x minimizes 2-norm(|b - Ax|)
-      resids is the sum square residuals
-      rank is the rank of A
-      s is the rank of the singular values of A in descending order
-
-If b is a matrix then x is also a matrix with corresponding columns.
-If the rank of A is less than the number of columns of A or greater than
-the number of rows, then residuals will be returned as an empty array
-otherwise resids = sum((b-dot(A,x)**2).
-Singular values less than s[0]*rcond are treated as zero.
+    where x minimizes 2-norm(|b - Ax|)
+    resids is the sum square residuals
+    rank is the rank of A
+    s is the rank of the singular values of A in descending order
+
+    If b is a matrix then x is also a matrix with corresponding columns.
+    If the rank of A is less than the number of columns of A or greater than
+    the number of rows, then residuals will be returned as an empty array
+    otherwise resids = sum((b-dot(A,x)**2).
+    Singular values less than s[0]*rcond are treated as zero.
 """
     import math
     a = asarray(a)
author	Charles Harris <charlesr.harris@gmail.com>	2006-10-13 17:08:24 +0000
committer	Charles Harris <charlesr.harris@gmail.com>	2006-10-13 17:08:24 +0000
commit	4a69a27cf3edc60a1304110e5b9bf4090b8c6ec5 (patch)
tree	69eda45e8b3a73382597b7ef1b1eed3b876d9716
parent	9361c17caa2816d755e2ce2a5faa860ea2e675c2 (diff)
download	numpy-4a69a27cf3edc60a1304110e5b9bf4090b8c6ec5.tar.gz