diff options
author | RedRuM <44142765+zoj613@users.noreply.github.com> | 2019-08-17 23:45:26 +0200 |
---|---|---|
committer | RedRuM <44142765+zoj613@users.noreply.github.com> | 2019-08-17 23:45:26 +0200 |
commit | ad56f16191adb1b45576d6c3214c19dffdd0eb34 (patch) | |
tree | 1ed053a5f23d23768b84ee5953162fe18ff6b8b4 | |
parent | e6f9a354ff3e2354d343493f6463328895f702c2 (diff) | |
download | numpy-ad56f16191adb1b45576d6c3214c19dffdd0eb34.tar.gz |
ENH: re-write the implementation with new keywords method and use_factor
-rw-r--r-- | changelog/14197.improvement.rst | 8 | ||||
-rw-r--r-- | numpy/random/generator.pyx | 79 | ||||
-rw-r--r-- | numpy/random/tests/test_generator_mt19937.py | 87 |
3 files changed, 157 insertions, 17 deletions
diff --git a/changelog/14197.improvement.rst b/changelog/14197.improvement.rst new file mode 100644 index 000000000..b822d1645 --- /dev/null +++ b/changelog/14197.improvement.rst @@ -0,0 +1,8 @@ +``method`` and ``use_factor`` options for `np.random.multivariate_normal` +---------------------------------------------------- +A ``method`` option is now available for `np.random.multivariate_normal` with +possible values {'svd', 'eigh', 'cholesky'}. To use it, write +``np.random.multivariate_normal(..., method=<method>)``. Another option +``use_factor`` is now available which allows the user to pass a precomputed +factor of a covariance matrix in order to speed up computation for any given +``method``. To use it, write ``np.random.multivariate_normal(..., use_factor=True)``.
\ No newline at end of file diff --git a/numpy/random/generator.pyx b/numpy/random/generator.pyx index c7432d8c1..f6f0aba6a 100644 --- a/numpy/random/generator.pyx +++ b/numpy/random/generator.pyx @@ -3331,7 +3331,7 @@ cdef class Generator: # Multivariate distributions: def multivariate_normal(self, mean, cov, size=None, check_valid='warn', - tol=1e-8): + tol=1e-8, method='svd', use_factor=False): """ multivariate_normal(mean, cov, size=None, check_valid='warn', tol=1e-8) @@ -3361,6 +3361,19 @@ cdef class Generator: tol : float, optional Tolerance when checking the singular values in covariance matrix. cov is cast to double before the check. + method : { 'svd', 'eigh', 'cholesky'}, optional + The cov input is used to compute a factor matrix A such that + transpose(A) * A = cov. This argument is used to select the method + used to compute the factor matrix A. The default method 'svd' is + the slowest, while 'cholesky' is the fastest but less robust than + the slowest method. The method `eigh` uses eigen decomposition to + compute A and is faster than svd but slower than cholesky. + use_factor : bool, optional + If set to True then cov argument is treated as a precomputed factor + matrix A such that np.dot(A * transpose(A)) == cov is True, with the + assumption that A was computed with the method specified in the + ``method`` argument. This provides significant speedups because + the factorization of cov is avoided. Returns ------- @@ -3421,10 +3434,25 @@ cdef class Generator: -------- >>> mean = (1, 2) >>> cov = [[1, 0], [0, 1]] - >>> x = np.random.default_rng().multivariate_normal(mean, cov, (3, 3)) + >>> rng = np.random.default_rng() + >>> x = rng.multivariate_normal(mean, cov, (3, 3)) >>> x.shape (3, 3, 2) + We can use a different method other than the default to factorize cov: + >>> y = rng.multivariate_normal(mean, cov, (3, 3), method='cholesky') + >>> y.shape + (3, 3, 2) + + We can also use a precomputed factor matrix of cov to speed up the + computation. This is very useful when one needs to generate random + values using the sample covariance matrix but different means. + >>> s, u = np.linalg.eigh(cov) + >>> A = u * np.sqrt(s) # factor matrix of cov using Eigendecomposition. + >>> z = rng.multivariate_normal(mean, A, (3, 3), method='eigh', use_factor=True) + >>> z.shape + (3, 3, 2) + The following is probably true, given that 0.6 is roughly twice the standard deviation: @@ -3432,7 +3460,9 @@ cdef class Generator: [True, True] # random """ - from numpy.dual import svd + if method not in {'eigh', 'svd', 'cholesky'}: + raise ValueError( + "mode must to be one of {'eigh', 'svd', 'cholesky'}") # Check preconditions on arguments mean = np.array(mean) @@ -3446,10 +3476,13 @@ cdef class Generator: if len(mean.shape) != 1: raise ValueError("mean must be 1 dimensional") + matrix_type = "factor" if use_factor else "cov" if (len(cov.shape) != 2) or (cov.shape[0] != cov.shape[1]): - raise ValueError("cov must be 2 dimensional and square") + raise ValueError("{} must be 2 dimensional and square" + .format(matrix_type)) if mean.shape[0] != cov.shape[0]: - raise ValueError("mean and cov must have same length") + raise ValueError("mean and {} must have same length" + .format(matrix_type)) # Compute shape of output and create a matrix of independent # standard normally distributed random numbers. The matrix has rows @@ -3459,6 +3492,16 @@ cdef class Generator: final_shape.append(mean.shape[0]) x = self.standard_normal(final_shape).reshape(-1, mean.shape[0]) + if use_factor: + # check if the factor supplied is a cholesky upper triangular + # matrix and calculate random values accordingly. + if method == 'cholesky' and not np.any(np.tril(cov, k=-1)): + x = mean + np.dot(x, cov.T) + else: + x = mean + np.dot(x, cov) + x.shape = tuple(final_shape) + return x + # Transform matrix of standard normals into matrix where each row # contains multivariate normals with the desired covariance. # Compute A such that dot(transpose(A),A) == cov. @@ -3475,13 +3518,24 @@ cdef class Generator: # GH10839, ensure double to make tol meaningful cov = cov.astype(np.double) - (u, s, v) = svd(cov) + if method == 'svd': + from numpy.dual import svd + (u, s, vh) = svd(cov) + elif method == 'eigh': + from numpy.dual import eigh + (s, u) = eigh(cov) + else: + from numpy.dual import cholesky + l = cholesky(cov) - if check_valid != 'ignore': + if check_valid != 'ignore' and method != 'cholesky': if check_valid != 'warn' and check_valid != 'raise': - raise ValueError("check_valid must equal 'warn', 'raise', or 'ignore'") - - psd = np.allclose(np.dot(v.T * s, v), cov, rtol=tol, atol=tol) + raise ValueError( + "check_valid must equal 'warn', 'raise', or 'ignore'") + if method == 'svd': + psd = np.allclose(np.dot(vh.T * s, vh), cov, rtol=tol, atol=tol) + else: + psd = not np.any(s < -tol) if not psd: if check_valid == 'warn': warnings.warn("covariance is not positive-semidefinite.", @@ -3489,7 +3543,10 @@ cdef class Generator: else: raise ValueError("covariance is not positive-semidefinite.") - x = np.dot(x, np.sqrt(s)[:, None] * v) + if method == 'cholesky': + x = np.dot(x, l) + else: + x = np.dot(x, u * np.sqrt(s)) x += mean x.shape = tuple(final_shape) return x diff --git a/numpy/random/tests/test_generator_mt19937.py b/numpy/random/tests/test_generator_mt19937.py index a962fe84e..2677b04fa 100644 --- a/numpy/random/tests/test_generator_mt19937.py +++ b/numpy/random/tests/test_generator_mt19937.py @@ -3,6 +3,8 @@ import sys import pytest import numpy as np +from numpy.dual import cholesky, eigh, svd +from numpy.linalg import LinAlgError from numpy.testing import ( assert_, assert_raises, assert_equal, assert_warns, assert_no_warnings, assert_array_equal, @@ -982,7 +984,18 @@ class TestRandomDist(object): mean = (.123456789, 10) cov = [[1, 0], [0, 1]] size = (3, 2) - actual = random.multivariate_normal(mean, cov, size) + actual_svd = random.multivariate_normal(mean, cov, size) + # the seed needs to be reset for each method + random = Generator(MT19937(self.seed)) + actual_eigh = random.multivariate_normal(mean, cov, size, + method='eigh') + random = Generator(MT19937(self.seed)) + actual_chol = random.multivariate_normal(mean, cov, size, + method='cholesky') + # the factor matrix is the same for all methods + random = Generator(MT19937(self.seed)) + actual_factor = random.multivariate_normal(mean, cov, size, + use_factor=True) desired = np.array([[[-1.747478062846581, 11.25613495182354 ], [-0.9967333370066214, 10.342002097029821 ]], [[ 0.7850019631242964, 11.181113712443013 ], @@ -990,18 +1003,76 @@ class TestRandomDist(object): [[ 0.7130260107430003, 9.551628690083056 ], [ 0.7127098726541128, 11.991709234143173 ]]]) - assert_array_almost_equal(actual, desired, decimal=15) + assert_array_almost_equal(actual_svd, desired, decimal=15) + assert_array_almost_equal(actual_eigh, desired, decimal=15) + assert_array_almost_equal(actual_chol, desired, decimal=15) + assert_array_almost_equal(actual_factor, desired, decimal=15) # Check for default size, was raising deprecation warning - actual = random.multivariate_normal(mean, cov) - desired = np.array([0.233278563284287, 9.424140804347195]) - assert_array_almost_equal(actual, desired, decimal=15) + random = Generator(MT19937(self.seed)) + actual_svd = random.multivariate_normal(mean, cov) + random = Generator(MT19937(self.seed)) + actual_eigh = random.multivariate_normal(mean, cov, method='eigh') + random = Generator(MT19937(self.seed)) + actual_chol = random.multivariate_normal(mean, cov, method='cholesky') + # the factor matrix is the same for all methods + random = Generator(MT19937(self.seed)) + actual_factor = random.multivariate_normal(mean, cov, use_factor=True) + desired = np.array([-1.747478062846581, 11.25613495182354]) + assert_array_almost_equal(actual_svd, desired, decimal=15) + assert_array_almost_equal(actual_eigh, desired, decimal=15) + assert_array_almost_equal(actual_chol, desired, decimal=15) + assert_array_almost_equal(actual_factor, desired, decimal=15) + + # test if raises ValueError when non-square factor is used + non_square_factor = [[1, 0], [0, 1], [1, 0]] + assert_raises(ValueError, random.multivariate_normal, mean, + non_square_factor, use_factor=True) + + # Check that non symmetric covariance input raises exception when + # check_valid='raises' if using default svd method. + mean = [0, 0] + cov = [[1, 2], [1, 2]] + assert_raises(ValueError, random.multivariate_normal, mean, cov, + check_valid='raise') + + # Check if each method used with use_factor=False returns the same + # output when used with use_factor=True + cov = [[2, 1], [1, 2]] + random = Generator(MT19937(self.seed)) + desired_svd = random.multivariate_normal(mean, cov) + u, s, vh = svd(cov) + svd_factor = np.sqrt(s)[:, None] * vh + random = Generator(MT19937(self.seed)) + actual_svd = random.multivariate_normal(mean, svd_factor.T, + use_factor=True) + random = Generator(MT19937(self.seed)) + desired_eigh = random.multivariate_normal(mean, cov, method='eigh') + s, u = eigh(cov) + eigh_factor = u * np.sqrt(s) + random = Generator(MT19937(self.seed)) + actual_eigh = random.multivariate_normal(mean, eigh_factor, + method='eigh', + use_factor=True) + random = Generator(MT19937(self.seed)) + desired_chol = random.multivariate_normal(mean, cov, method='cholesky') + chol_factor = cholesky(cov) + random = Generator(MT19937(self.seed)) + actual_chol = random.multivariate_normal(mean, chol_factor, + method='cholesky', + use_factor=True) + assert_array_almost_equal(actual_svd, desired_svd, decimal=15) + assert_array_almost_equal(actual_eigh, desired_eigh, decimal=15) + assert_array_almost_equal(actual_chol, desired_chol, decimal=15) # Check that non positive-semidefinite covariance warns with # RuntimeWarning - mean = [0, 0] cov = [[1, 2], [2, 1]] assert_warns(RuntimeWarning, random.multivariate_normal, mean, cov) + assert_warns(RuntimeWarning, random.multivariate_normal, mean, cov, + method='eigh') + assert_raises(LinAlgError, random.multivariate_normal, mean, cov, + method='cholesky') # and that it doesn't warn with RuntimeWarning check_valid='ignore' assert_no_warnings(random.multivariate_normal, mean, cov, @@ -1010,10 +1081,14 @@ class TestRandomDist(object): # and that it raises with RuntimeWarning check_valid='raises' assert_raises(ValueError, random.multivariate_normal, mean, cov, check_valid='raise') + assert_raises(ValueError, random.multivariate_normal, mean, cov, + check_valid='raise', method='eigh') cov = np.array([[1, 0.1], [0.1, 1]], dtype=np.float32) with suppress_warnings() as sup: random.multivariate_normal(mean, cov) + random.multivariate_normal(mean, cov, method='eigh') + random.multivariate_normal(mean, cov, method='cholesky') w = sup.record(RuntimeWarning) assert len(w) == 0 |