ENH: re-write the implementation with new keywords method and use_factor

author: RedRuM <44142765+zoj613@users.noreply.github.com> 2019-08-17 23:45:26 +0200
committer: RedRuM <44142765+zoj613@users.noreply.github.com> 2019-08-17 23:45:26 +0200
commit: ad56f16191adb1b45576d6c3214c19dffdd0eb34 (patch)
tree: 1ed053a5f23d23768b84ee5953162fe18ff6b8b4
parent: e6f9a354ff3e2354d343493f6463328895f702c2 (diff)
download: numpy-ad56f16191adb1b45576d6c3214c19dffdd0eb34.tar.gz
3 files changed, 157 insertions, 17 deletions
diff --git a/changelog/14197.improvement.rst b/changelog/14197.improvement.rst
new file mode 100644
index 000000000..b822d1645
--- /dev/null
+++ b/changelog/14197.improvement.rst
@@ -0,0 +1,8 @@
+``method`` and ``use_factor`` options for `np.random.multivariate_normal`
+----------------------------------------------------
+A ``method`` option is now available for `np.random.multivariate_normal` with
+possible values {'svd', 'eigh', 'cholesky'}. To use it, write 
+``np.random.multivariate_normal(..., method=<method>)``. Another option
+``use_factor`` is now available which allows the user to pass a precomputed
+factor of a covariance matrix in order to speed up computation for any given
+``method``. To use it, write ``np.random.multivariate_normal(..., use_factor=True)``.
+\ No newline at end of file
diff --git a/numpy/random/generator.pyx b/numpy/random/generator.pyx
index c7432d8c1..f6f0aba6a 100644
--- a/numpy/random/generator.pyx
+++ b/numpy/random/generator.pyx
@@ -3331,7 +3331,7 @@ cdef class Generator:
 
     # Multivariate distributions:
     def multivariate_normal(self, mean, cov, size=None, check_valid='warn',
-                            tol=1e-8):
+                            tol=1e-8, method='svd', use_factor=False):
         """
         multivariate_normal(mean, cov, size=None, check_valid='warn', tol=1e-8)
 
@@ -3361,6 +3361,19 @@ cdef class Generator:
         tol : float, optional
             Tolerance when checking the singular values in covariance matrix.
             cov is cast to double before the check.
+        method : { 'svd', 'eigh', 'cholesky'}, optional
+            The cov input is used to compute a factor matrix A such that
+            transpose(A) * A = cov. This argument is used to select the method
+            used to compute the factor matrix A. The default method 'svd' is
+            the slowest, while 'cholesky' is the fastest but less robust than
+            the slowest method. The method `eigh` uses eigen decomposition to
+            compute A and is faster than svd but slower than cholesky.
+        use_factor : bool, optional
+            If set to True then cov argument is treated as a precomputed factor
+            matrix A such that np.dot(A * transpose(A)) == cov is True, with the
+            assumption that A was computed with the method specified in the
+            ``method`` argument. This provides significant speedups because
+            the factorization of cov is avoided.
 
         Returns
         -------
@@ -3421,10 +3434,25 @@ cdef class Generator:
         --------
         >>> mean = (1, 2)
         >>> cov = [[1, 0], [0, 1]]
-        >>> x = np.random.default_rng().multivariate_normal(mean, cov, (3, 3))
+        >>> rng = np.random.default_rng()
+        >>> x = rng.multivariate_normal(mean, cov, (3, 3))
         >>> x.shape
         (3, 3, 2)
 
+        We can use a different method other than the default to factorize cov:
+        >>> y = rng.multivariate_normal(mean, cov, (3, 3), method='cholesky')
+        >>> y.shape
+        (3, 3, 2)
+
+        We can also use a precomputed factor matrix of cov to speed up the
+        computation. This is very useful when one needs to generate random
+        values using the sample covariance matrix but different means.
+        >>> s, u = np.linalg.eigh(cov)
+        >>> A = u * np.sqrt(s)  # factor matrix of cov using Eigendecomposition.
+        >>> z = rng.multivariate_normal(mean, A, (3, 3), method='eigh', use_factor=True)
+        >>> z.shape
+        (3, 3, 2)
+
         The following is probably true, given that 0.6 is roughly twice the
         standard deviation:
 
@@ -3432,7 +3460,9 @@ cdef class Generator:
         [True, True] # random
 
         """
-        from numpy.dual import svd
+        if method not in {'eigh', 'svd', 'cholesky'}:
+            raise ValueError(
+                "mode must to be one of {'eigh', 'svd', 'cholesky'}")
 
         # Check preconditions on arguments
         mean = np.array(mean)
@@ -3446,10 +3476,13 @@ cdef class Generator:
 
         if len(mean.shape) != 1:
             raise ValueError("mean must be 1 dimensional")
+        matrix_type = "factor" if use_factor else "cov"
         if (len(cov.shape) != 2) or (cov.shape[0] != cov.shape[1]):
-            raise ValueError("cov must be 2 dimensional and square")
+            raise ValueError("{} must be 2 dimensional and square"
+                             .format(matrix_type))
         if mean.shape[0] != cov.shape[0]:
-            raise ValueError("mean and cov must have same length")
+            raise ValueError("mean and {} must have same length"
+                             .format(matrix_type))
 
         # Compute shape of output and create a matrix of independent
         # standard normally distributed random numbers. The matrix has rows
@@ -3459,6 +3492,16 @@ cdef class Generator:
         final_shape.append(mean.shape[0])
         x = self.standard_normal(final_shape).reshape(-1, mean.shape[0])
 
+        if use_factor:
+            # check if the factor supplied is a cholesky upper triangular
+            # matrix and calculate random values accordingly.
+            if method == 'cholesky' and not np.any(np.tril(cov, k=-1)):
+                x = mean + np.dot(x, cov.T)
+            else:
+                x = mean + np.dot(x, cov)
+            x.shape = tuple(final_shape)
+            return x
+
         # Transform matrix of standard normals into matrix where each row
         # contains multivariate normals with the desired covariance.
         # Compute A such that dot(transpose(A),A) == cov.
@@ -3475,13 +3518,24 @@ cdef class Generator:
 
         # GH10839, ensure double to make tol meaningful
         cov = cov.astype(np.double)
-        (u, s, v) = svd(cov)
+        if method == 'svd':
+            from numpy.dual import svd
+            (u, s, vh) = svd(cov)
+        elif method == 'eigh':
+            from numpy.dual import eigh
+            (s, u)  = eigh(cov)
+        else:
+            from numpy.dual import cholesky
+            l = cholesky(cov)
 
-        if check_valid != 'ignore':
+        if check_valid != 'ignore' and method != 'cholesky':
             if check_valid != 'warn' and check_valid != 'raise':
-                raise ValueError("check_valid must equal 'warn', 'raise', or 'ignore'")
-
-            psd = np.allclose(np.dot(v.T * s, v), cov, rtol=tol, atol=tol)
+                raise ValueError(
+                    "check_valid must equal 'warn', 'raise', or 'ignore'")
+            if method == 'svd':
+                psd = np.allclose(np.dot(vh.T * s, vh), cov, rtol=tol, atol=tol)
+            else:
+                psd = not np.any(s < -tol)
             if not psd:
                 if check_valid == 'warn':
                     warnings.warn("covariance is not positive-semidefinite.",
@@ -3489,7 +3543,10 @@ cdef class Generator:
                 else:
                     raise ValueError("covariance is not positive-semidefinite.")
 
-        x = np.dot(x, np.sqrt(s)[:, None] * v)
+        if method == 'cholesky':
+            x = np.dot(x, l)
+        else:
+            x = np.dot(x, u * np.sqrt(s))
         x += mean
         x.shape = tuple(final_shape)
         return x
diff --git a/numpy/random/tests/test_generator_mt19937.py b/numpy/random/tests/test_generator_mt19937.py
index a962fe84e..2677b04fa 100644
--- a/numpy/random/tests/test_generator_mt19937.py
+++ b/numpy/random/tests/test_generator_mt19937.py
@@ -3,6 +3,8 @@ import sys
 import pytest
 
 import numpy as np
+from numpy.dual import cholesky, eigh, svd
+from numpy.linalg import LinAlgError
 from numpy.testing import (
     assert_, assert_raises, assert_equal,
     assert_warns, assert_no_warnings, assert_array_equal,
@@ -982,7 +984,18 @@ class TestRandomDist(object):
         mean = (.123456789, 10)
         cov = [[1, 0], [0, 1]]
         size = (3, 2)
-        actual = random.multivariate_normal(mean, cov, size)
+        actual_svd = random.multivariate_normal(mean, cov, size)
+        # the seed needs to be reset for each method
+        random = Generator(MT19937(self.seed))
+        actual_eigh = random.multivariate_normal(mean, cov, size,
+                                                 method='eigh')
+        random = Generator(MT19937(self.seed))
+        actual_chol = random.multivariate_normal(mean, cov, size,
+                                                 method='cholesky')
+        # the factor matrix is the same for all methods
+        random = Generator(MT19937(self.seed))
+        actual_factor = random.multivariate_normal(mean, cov, size,
+                                                   use_factor=True)
         desired = np.array([[[-1.747478062846581,  11.25613495182354  ],
                              [-0.9967333370066214, 10.342002097029821 ]],
                             [[ 0.7850019631242964, 11.181113712443013 ],
@@ -990,18 +1003,76 @@ class TestRandomDist(object):
                             [[ 0.7130260107430003,  9.551628690083056 ],
                              [ 0.7127098726541128, 11.991709234143173 ]]])
 
-        assert_array_almost_equal(actual, desired, decimal=15)
+        assert_array_almost_equal(actual_svd, desired, decimal=15)
+        assert_array_almost_equal(actual_eigh, desired, decimal=15)
+        assert_array_almost_equal(actual_chol, desired, decimal=15)
+        assert_array_almost_equal(actual_factor, desired, decimal=15)
 
         # Check for default size, was raising deprecation warning
-        actual = random.multivariate_normal(mean, cov)
-        desired = np.array([0.233278563284287, 9.424140804347195])
-        assert_array_almost_equal(actual, desired, decimal=15)
+        random = Generator(MT19937(self.seed))
+        actual_svd = random.multivariate_normal(mean, cov)
+        random = Generator(MT19937(self.seed))
+        actual_eigh = random.multivariate_normal(mean, cov, method='eigh')
+        random = Generator(MT19937(self.seed))
+        actual_chol = random.multivariate_normal(mean, cov, method='cholesky')
+        # the factor matrix is the same for all methods
+        random = Generator(MT19937(self.seed))
+        actual_factor = random.multivariate_normal(mean, cov, use_factor=True)
+        desired = np.array([-1.747478062846581, 11.25613495182354])
+        assert_array_almost_equal(actual_svd, desired, decimal=15)
+        assert_array_almost_equal(actual_eigh, desired, decimal=15)
+        assert_array_almost_equal(actual_chol, desired, decimal=15)
+        assert_array_almost_equal(actual_factor, desired, decimal=15)
+
+        # test if raises ValueError when non-square factor is used
+        non_square_factor = [[1, 0], [0, 1], [1, 0]]
+        assert_raises(ValueError, random.multivariate_normal, mean,
+                      non_square_factor, use_factor=True)
+
+        # Check that non symmetric covariance input raises exception when
+        # check_valid='raises' if using default svd method.
+        mean = [0, 0]
+        cov = [[1, 2], [1, 2]]
+        assert_raises(ValueError, random.multivariate_normal, mean, cov,
+                      check_valid='raise')
+
+        # Check if each method used with use_factor=False returns the same
+        # output when used with use_factor=True
+        cov = [[2, 1], [1, 2]]
+        random = Generator(MT19937(self.seed))
+        desired_svd = random.multivariate_normal(mean, cov)
+        u, s, vh = svd(cov)
+        svd_factor = np.sqrt(s)[:, None] * vh
+        random = Generator(MT19937(self.seed))
+        actual_svd = random.multivariate_normal(mean, svd_factor.T,
+                                                use_factor=True)
+        random = Generator(MT19937(self.seed))
+        desired_eigh = random.multivariate_normal(mean, cov, method='eigh')
+        s, u = eigh(cov)
+        eigh_factor = u * np.sqrt(s)
+        random = Generator(MT19937(self.seed))
+        actual_eigh = random.multivariate_normal(mean, eigh_factor,
+                                                 method='eigh',
+                                                 use_factor=True)
+        random = Generator(MT19937(self.seed))
+        desired_chol = random.multivariate_normal(mean, cov, method='cholesky')
+        chol_factor = cholesky(cov)
+        random = Generator(MT19937(self.seed))
+        actual_chol = random.multivariate_normal(mean, chol_factor,
+                                                 method='cholesky',
+                                                 use_factor=True)
+        assert_array_almost_equal(actual_svd, desired_svd, decimal=15)
+        assert_array_almost_equal(actual_eigh, desired_eigh, decimal=15)
+        assert_array_almost_equal(actual_chol, desired_chol, decimal=15)
 
         # Check that non positive-semidefinite covariance warns with
         # RuntimeWarning
-        mean = [0, 0]
         cov = [[1, 2], [2, 1]]
         assert_warns(RuntimeWarning, random.multivariate_normal, mean, cov)
+        assert_warns(RuntimeWarning, random.multivariate_normal, mean, cov,
+                     method='eigh')
+        assert_raises(LinAlgError, random.multivariate_normal, mean, cov,
+                      method='cholesky')
 
         # and that it doesn't warn with RuntimeWarning check_valid='ignore'
         assert_no_warnings(random.multivariate_normal, mean, cov,
@@ -1010,10 +1081,14 @@ class TestRandomDist(object):
         # and that it raises with RuntimeWarning check_valid='raises'
         assert_raises(ValueError, random.multivariate_normal, mean, cov,
                       check_valid='raise')
+        assert_raises(ValueError, random.multivariate_normal, mean, cov,
+                      check_valid='raise', method='eigh')
 
         cov = np.array([[1, 0.1], [0.1, 1]], dtype=np.float32)
         with suppress_warnings() as sup:
             random.multivariate_normal(mean, cov)
+            random.multivariate_normal(mean, cov, method='eigh')
+            random.multivariate_normal(mean, cov, method='cholesky')
             w = sup.record(RuntimeWarning)
             assert len(w) == 0
author	RedRuM <44142765+zoj613@users.noreply.github.com>	2019-08-17 23:45:26 +0200
committer	RedRuM <44142765+zoj613@users.noreply.github.com>	2019-08-17 23:45:26 +0200
commit	ad56f16191adb1b45576d6c3214c19dffdd0eb34 (patch)
tree	1ed053a5f23d23768b84ee5953162fe18ff6b8b4
parent	e6f9a354ff3e2354d343493f6463328895f702c2 (diff)
download	numpy-ad56f16191adb1b45576d6c3214c19dffdd0eb34.tar.gz