summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRedRuM <44142765+zoj613@users.noreply.github.com>2019-08-17 23:45:26 +0200
committerRedRuM <44142765+zoj613@users.noreply.github.com>2019-08-17 23:45:26 +0200
commitad56f16191adb1b45576d6c3214c19dffdd0eb34 (patch)
tree1ed053a5f23d23768b84ee5953162fe18ff6b8b4
parente6f9a354ff3e2354d343493f6463328895f702c2 (diff)
downloadnumpy-ad56f16191adb1b45576d6c3214c19dffdd0eb34.tar.gz
ENH: re-write the implementation with new keywords method and use_factor
-rw-r--r--changelog/14197.improvement.rst8
-rw-r--r--numpy/random/generator.pyx79
-rw-r--r--numpy/random/tests/test_generator_mt19937.py87
3 files changed, 157 insertions, 17 deletions
diff --git a/changelog/14197.improvement.rst b/changelog/14197.improvement.rst
new file mode 100644
index 000000000..b822d1645
--- /dev/null
+++ b/changelog/14197.improvement.rst
@@ -0,0 +1,8 @@
+``method`` and ``use_factor`` options for `np.random.multivariate_normal`
+----------------------------------------------------
+A ``method`` option is now available for `np.random.multivariate_normal` with
+possible values {'svd', 'eigh', 'cholesky'}. To use it, write
+``np.random.multivariate_normal(..., method=<method>)``. Another option
+``use_factor`` is now available which allows the user to pass a precomputed
+factor of a covariance matrix in order to speed up computation for any given
+``method``. To use it, write ``np.random.multivariate_normal(..., use_factor=True)``. \ No newline at end of file
diff --git a/numpy/random/generator.pyx b/numpy/random/generator.pyx
index c7432d8c1..f6f0aba6a 100644
--- a/numpy/random/generator.pyx
+++ b/numpy/random/generator.pyx
@@ -3331,7 +3331,7 @@ cdef class Generator:
# Multivariate distributions:
def multivariate_normal(self, mean, cov, size=None, check_valid='warn',
- tol=1e-8):
+ tol=1e-8, method='svd', use_factor=False):
"""
multivariate_normal(mean, cov, size=None, check_valid='warn', tol=1e-8)
@@ -3361,6 +3361,19 @@ cdef class Generator:
tol : float, optional
Tolerance when checking the singular values in covariance matrix.
cov is cast to double before the check.
+ method : { 'svd', 'eigh', 'cholesky'}, optional
+ The cov input is used to compute a factor matrix A such that
+ transpose(A) * A = cov. This argument is used to select the method
+ used to compute the factor matrix A. The default method 'svd' is
+ the slowest, while 'cholesky' is the fastest but less robust than
+ the slowest method. The method `eigh` uses eigen decomposition to
+ compute A and is faster than svd but slower than cholesky.
+ use_factor : bool, optional
+ If set to True then cov argument is treated as a precomputed factor
+ matrix A such that np.dot(A * transpose(A)) == cov is True, with the
+ assumption that A was computed with the method specified in the
+ ``method`` argument. This provides significant speedups because
+ the factorization of cov is avoided.
Returns
-------
@@ -3421,10 +3434,25 @@ cdef class Generator:
--------
>>> mean = (1, 2)
>>> cov = [[1, 0], [0, 1]]
- >>> x = np.random.default_rng().multivariate_normal(mean, cov, (3, 3))
+ >>> rng = np.random.default_rng()
+ >>> x = rng.multivariate_normal(mean, cov, (3, 3))
>>> x.shape
(3, 3, 2)
+ We can use a different method other than the default to factorize cov:
+ >>> y = rng.multivariate_normal(mean, cov, (3, 3), method='cholesky')
+ >>> y.shape
+ (3, 3, 2)
+
+ We can also use a precomputed factor matrix of cov to speed up the
+ computation. This is very useful when one needs to generate random
+ values using the sample covariance matrix but different means.
+ >>> s, u = np.linalg.eigh(cov)
+ >>> A = u * np.sqrt(s) # factor matrix of cov using Eigendecomposition.
+ >>> z = rng.multivariate_normal(mean, A, (3, 3), method='eigh', use_factor=True)
+ >>> z.shape
+ (3, 3, 2)
+
The following is probably true, given that 0.6 is roughly twice the
standard deviation:
@@ -3432,7 +3460,9 @@ cdef class Generator:
[True, True] # random
"""
- from numpy.dual import svd
+ if method not in {'eigh', 'svd', 'cholesky'}:
+ raise ValueError(
+ "mode must to be one of {'eigh', 'svd', 'cholesky'}")
# Check preconditions on arguments
mean = np.array(mean)
@@ -3446,10 +3476,13 @@ cdef class Generator:
if len(mean.shape) != 1:
raise ValueError("mean must be 1 dimensional")
+ matrix_type = "factor" if use_factor else "cov"
if (len(cov.shape) != 2) or (cov.shape[0] != cov.shape[1]):
- raise ValueError("cov must be 2 dimensional and square")
+ raise ValueError("{} must be 2 dimensional and square"
+ .format(matrix_type))
if mean.shape[0] != cov.shape[0]:
- raise ValueError("mean and cov must have same length")
+ raise ValueError("mean and {} must have same length"
+ .format(matrix_type))
# Compute shape of output and create a matrix of independent
# standard normally distributed random numbers. The matrix has rows
@@ -3459,6 +3492,16 @@ cdef class Generator:
final_shape.append(mean.shape[0])
x = self.standard_normal(final_shape).reshape(-1, mean.shape[0])
+ if use_factor:
+ # check if the factor supplied is a cholesky upper triangular
+ # matrix and calculate random values accordingly.
+ if method == 'cholesky' and not np.any(np.tril(cov, k=-1)):
+ x = mean + np.dot(x, cov.T)
+ else:
+ x = mean + np.dot(x, cov)
+ x.shape = tuple(final_shape)
+ return x
+
# Transform matrix of standard normals into matrix where each row
# contains multivariate normals with the desired covariance.
# Compute A such that dot(transpose(A),A) == cov.
@@ -3475,13 +3518,24 @@ cdef class Generator:
# GH10839, ensure double to make tol meaningful
cov = cov.astype(np.double)
- (u, s, v) = svd(cov)
+ if method == 'svd':
+ from numpy.dual import svd
+ (u, s, vh) = svd(cov)
+ elif method == 'eigh':
+ from numpy.dual import eigh
+ (s, u) = eigh(cov)
+ else:
+ from numpy.dual import cholesky
+ l = cholesky(cov)
- if check_valid != 'ignore':
+ if check_valid != 'ignore' and method != 'cholesky':
if check_valid != 'warn' and check_valid != 'raise':
- raise ValueError("check_valid must equal 'warn', 'raise', or 'ignore'")
-
- psd = np.allclose(np.dot(v.T * s, v), cov, rtol=tol, atol=tol)
+ raise ValueError(
+ "check_valid must equal 'warn', 'raise', or 'ignore'")
+ if method == 'svd':
+ psd = np.allclose(np.dot(vh.T * s, vh), cov, rtol=tol, atol=tol)
+ else:
+ psd = not np.any(s < -tol)
if not psd:
if check_valid == 'warn':
warnings.warn("covariance is not positive-semidefinite.",
@@ -3489,7 +3543,10 @@ cdef class Generator:
else:
raise ValueError("covariance is not positive-semidefinite.")
- x = np.dot(x, np.sqrt(s)[:, None] * v)
+ if method == 'cholesky':
+ x = np.dot(x, l)
+ else:
+ x = np.dot(x, u * np.sqrt(s))
x += mean
x.shape = tuple(final_shape)
return x
diff --git a/numpy/random/tests/test_generator_mt19937.py b/numpy/random/tests/test_generator_mt19937.py
index a962fe84e..2677b04fa 100644
--- a/numpy/random/tests/test_generator_mt19937.py
+++ b/numpy/random/tests/test_generator_mt19937.py
@@ -3,6 +3,8 @@ import sys
import pytest
import numpy as np
+from numpy.dual import cholesky, eigh, svd
+from numpy.linalg import LinAlgError
from numpy.testing import (
assert_, assert_raises, assert_equal,
assert_warns, assert_no_warnings, assert_array_equal,
@@ -982,7 +984,18 @@ class TestRandomDist(object):
mean = (.123456789, 10)
cov = [[1, 0], [0, 1]]
size = (3, 2)
- actual = random.multivariate_normal(mean, cov, size)
+ actual_svd = random.multivariate_normal(mean, cov, size)
+ # the seed needs to be reset for each method
+ random = Generator(MT19937(self.seed))
+ actual_eigh = random.multivariate_normal(mean, cov, size,
+ method='eigh')
+ random = Generator(MT19937(self.seed))
+ actual_chol = random.multivariate_normal(mean, cov, size,
+ method='cholesky')
+ # the factor matrix is the same for all methods
+ random = Generator(MT19937(self.seed))
+ actual_factor = random.multivariate_normal(mean, cov, size,
+ use_factor=True)
desired = np.array([[[-1.747478062846581, 11.25613495182354 ],
[-0.9967333370066214, 10.342002097029821 ]],
[[ 0.7850019631242964, 11.181113712443013 ],
@@ -990,18 +1003,76 @@ class TestRandomDist(object):
[[ 0.7130260107430003, 9.551628690083056 ],
[ 0.7127098726541128, 11.991709234143173 ]]])
- assert_array_almost_equal(actual, desired, decimal=15)
+ assert_array_almost_equal(actual_svd, desired, decimal=15)
+ assert_array_almost_equal(actual_eigh, desired, decimal=15)
+ assert_array_almost_equal(actual_chol, desired, decimal=15)
+ assert_array_almost_equal(actual_factor, desired, decimal=15)
# Check for default size, was raising deprecation warning
- actual = random.multivariate_normal(mean, cov)
- desired = np.array([0.233278563284287, 9.424140804347195])
- assert_array_almost_equal(actual, desired, decimal=15)
+ random = Generator(MT19937(self.seed))
+ actual_svd = random.multivariate_normal(mean, cov)
+ random = Generator(MT19937(self.seed))
+ actual_eigh = random.multivariate_normal(mean, cov, method='eigh')
+ random = Generator(MT19937(self.seed))
+ actual_chol = random.multivariate_normal(mean, cov, method='cholesky')
+ # the factor matrix is the same for all methods
+ random = Generator(MT19937(self.seed))
+ actual_factor = random.multivariate_normal(mean, cov, use_factor=True)
+ desired = np.array([-1.747478062846581, 11.25613495182354])
+ assert_array_almost_equal(actual_svd, desired, decimal=15)
+ assert_array_almost_equal(actual_eigh, desired, decimal=15)
+ assert_array_almost_equal(actual_chol, desired, decimal=15)
+ assert_array_almost_equal(actual_factor, desired, decimal=15)
+
+ # test if raises ValueError when non-square factor is used
+ non_square_factor = [[1, 0], [0, 1], [1, 0]]
+ assert_raises(ValueError, random.multivariate_normal, mean,
+ non_square_factor, use_factor=True)
+
+ # Check that non symmetric covariance input raises exception when
+ # check_valid='raises' if using default svd method.
+ mean = [0, 0]
+ cov = [[1, 2], [1, 2]]
+ assert_raises(ValueError, random.multivariate_normal, mean, cov,
+ check_valid='raise')
+
+ # Check if each method used with use_factor=False returns the same
+ # output when used with use_factor=True
+ cov = [[2, 1], [1, 2]]
+ random = Generator(MT19937(self.seed))
+ desired_svd = random.multivariate_normal(mean, cov)
+ u, s, vh = svd(cov)
+ svd_factor = np.sqrt(s)[:, None] * vh
+ random = Generator(MT19937(self.seed))
+ actual_svd = random.multivariate_normal(mean, svd_factor.T,
+ use_factor=True)
+ random = Generator(MT19937(self.seed))
+ desired_eigh = random.multivariate_normal(mean, cov, method='eigh')
+ s, u = eigh(cov)
+ eigh_factor = u * np.sqrt(s)
+ random = Generator(MT19937(self.seed))
+ actual_eigh = random.multivariate_normal(mean, eigh_factor,
+ method='eigh',
+ use_factor=True)
+ random = Generator(MT19937(self.seed))
+ desired_chol = random.multivariate_normal(mean, cov, method='cholesky')
+ chol_factor = cholesky(cov)
+ random = Generator(MT19937(self.seed))
+ actual_chol = random.multivariate_normal(mean, chol_factor,
+ method='cholesky',
+ use_factor=True)
+ assert_array_almost_equal(actual_svd, desired_svd, decimal=15)
+ assert_array_almost_equal(actual_eigh, desired_eigh, decimal=15)
+ assert_array_almost_equal(actual_chol, desired_chol, decimal=15)
# Check that non positive-semidefinite covariance warns with
# RuntimeWarning
- mean = [0, 0]
cov = [[1, 2], [2, 1]]
assert_warns(RuntimeWarning, random.multivariate_normal, mean, cov)
+ assert_warns(RuntimeWarning, random.multivariate_normal, mean, cov,
+ method='eigh')
+ assert_raises(LinAlgError, random.multivariate_normal, mean, cov,
+ method='cholesky')
# and that it doesn't warn with RuntimeWarning check_valid='ignore'
assert_no_warnings(random.multivariate_normal, mean, cov,
@@ -1010,10 +1081,14 @@ class TestRandomDist(object):
# and that it raises with RuntimeWarning check_valid='raises'
assert_raises(ValueError, random.multivariate_normal, mean, cov,
check_valid='raise')
+ assert_raises(ValueError, random.multivariate_normal, mean, cov,
+ check_valid='raise', method='eigh')
cov = np.array([[1, 0.1], [0.1, 1]], dtype=np.float32)
with suppress_warnings() as sup:
random.multivariate_normal(mean, cov)
+ random.multivariate_normal(mean, cov, method='eigh')
+ random.multivariate_normal(mean, cov, method='cholesky')
w = sup.record(RuntimeWarning)
assert len(w) == 0