diff options
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/core/code_generators/ufunc_docstrings.py | 11 | ||||
-rw-r--r-- | numpy/core/memmap.py | 8 | ||||
-rw-r--r-- | numpy/linalg/linalg.py | 36 | ||||
-rw-r--r-- | numpy/linalg/tests/test_linalg.py | 35 | ||||
-rw-r--r-- | numpy/ma/core.py | 6 | ||||
-rw-r--r-- | numpy/random/_generator.pyx | 85 | ||||
-rw-r--r-- | numpy/random/_pcg64.pyx | 2 | ||||
-rw-r--r-- | numpy/random/tests/test_generator_mt19937.py | 25 |
8 files changed, 178 insertions, 30 deletions
diff --git a/numpy/core/code_generators/ufunc_docstrings.py b/numpy/core/code_generators/ufunc_docstrings.py index 129516658..82cd6fb27 100644 --- a/numpy/core/code_generators/ufunc_docstrings.py +++ b/numpy/core/code_generators/ufunc_docstrings.py @@ -1835,6 +1835,17 @@ add_newdoc('numpy.core.umath', 'left_shift', >>> np.left_shift(5, [1,2,3]) array([10, 20, 40]) + Note that the dtype of the second argument may change the dtype of the + result and can lead to unexpected results in some cases (see + :ref:`Casting Rules <ufuncs.casting>`): + + >>> a = np.left_shift(np.uint8(255), 1) # Expect 254 + >>> print(a, type(a)) # Unexpected result due to upcasting + 510 <class 'numpy.int64'> + >>> b = np.left_shift(np.uint8(255), np.uint8(1)) + >>> print(b, type(b)) + 254 <class 'numpy.uint8'> + """) add_newdoc('numpy.core.umath', 'less', diff --git a/numpy/core/memmap.py b/numpy/core/memmap.py index ad66446c2..cb025736e 100644 --- a/numpy/core/memmap.py +++ b/numpy/core/memmap.py @@ -209,10 +209,12 @@ class memmap(ndarray): import os.path try: mode = mode_equivalents[mode] - except KeyError: + except KeyError as e: if mode not in valid_filemodes: - raise ValueError("mode must be one of %s" % - (valid_filemodes + list(mode_equivalents.keys()))) + raise ValueError( + "mode must be one of {!r} (got {!r})" + .format(valid_filemodes + list(mode_equivalents.keys()), mode) + ) from None if mode == 'w+' and shape is None: raise ValueError("shape must be given") diff --git a/numpy/linalg/linalg.py b/numpy/linalg/linalg.py index 6d3afdd49..5ee326f3c 100644 --- a/numpy/linalg/linalg.py +++ b/numpy/linalg/linalg.py @@ -2613,12 +2613,13 @@ def norm(x, ord=None, axis=None, keepdims=False): # multi_dot -def _multidot_dispatcher(arrays): - return arrays +def _multidot_dispatcher(arrays, *, out=None): + yield from arrays + yield out @array_function_dispatch(_multidot_dispatcher) -def multi_dot(arrays): +def multi_dot(arrays, *, out=None): """ Compute the dot product of two or more arrays in a single function call, while automatically selecting the fastest evaluation order. @@ -2642,6 +2643,15 @@ def multi_dot(arrays): If the first argument is 1-D it is treated as row vector. If the last argument is 1-D it is treated as column vector. The other arguments must be 2-D. + out : ndarray, optional + Output argument. This must have the exact kind that would be returned + if it was not used. In particular, it must have the right type, must be + C-contiguous, and its dtype must be the dtype that would be returned + for `dot(a, b)`. This is a performance feature. Therefore, if these + conditions are not met, an exception is raised, instead of attempting + to be flexible. + + .. versionadded:: 1.19.0 Returns ------- @@ -2699,7 +2709,7 @@ def multi_dot(arrays): if n < 2: raise ValueError("Expecting at least two arrays.") elif n == 2: - return dot(arrays[0], arrays[1]) + return dot(arrays[0], arrays[1], out=out) arrays = [asanyarray(a) for a in arrays] @@ -2715,10 +2725,10 @@ def multi_dot(arrays): # _multi_dot_three is much faster than _multi_dot_matrix_chain_order if n == 3: - result = _multi_dot_three(arrays[0], arrays[1], arrays[2]) + result = _multi_dot_three(arrays[0], arrays[1], arrays[2], out=out) else: order = _multi_dot_matrix_chain_order(arrays) - result = _multi_dot(arrays, order, 0, n - 1) + result = _multi_dot(arrays, order, 0, n - 1, out=out) # return proper shape if ndim_first == 1 and ndim_last == 1: @@ -2729,7 +2739,7 @@ def multi_dot(arrays): return result -def _multi_dot_three(A, B, C): +def _multi_dot_three(A, B, C, out=None): """ Find the best order for three arrays and do the multiplication. @@ -2745,9 +2755,9 @@ def _multi_dot_three(A, B, C): cost2 = a1b0 * c1 * (a0 + b1c0) if cost1 < cost2: - return dot(dot(A, B), C) + return dot(dot(A, B), C, out=out) else: - return dot(A, dot(B, C)) + return dot(A, dot(B, C), out=out) def _multi_dot_matrix_chain_order(arrays, return_costs=False): @@ -2791,10 +2801,14 @@ def _multi_dot_matrix_chain_order(arrays, return_costs=False): return (s, m) if return_costs else s -def _multi_dot(arrays, order, i, j): +def _multi_dot(arrays, order, i, j, out=None): """Actually do the multiplication with the given order.""" if i == j: + # the initial call with non-None out should never get here + assert out is None + return arrays[i] else: return dot(_multi_dot(arrays, order, i, order[i, j]), - _multi_dot(arrays, order, order[i, j] + 1, j)) + _multi_dot(arrays, order, order[i, j] + 1, j), + out=out) diff --git a/numpy/linalg/tests/test_linalg.py b/numpy/linalg/tests/test_linalg.py index dae4ef61e..3f3bf9f70 100644 --- a/numpy/linalg/tests/test_linalg.py +++ b/numpy/linalg/tests/test_linalg.py @@ -1930,6 +1930,41 @@ class TestMultiDot: # the result should be a scalar assert_equal(multi_dot([A1d, B, C, D1d]).shape, ()) + def test_three_arguments_and_out(self): + # multi_dot with three arguments uses a fast hand coded algorithm to + # determine the optimal order. Therefore test it separately. + A = np.random.random((6, 2)) + B = np.random.random((2, 6)) + C = np.random.random((6, 2)) + + out = np.zeros((6, 2)) + ret = multi_dot([A, B, C], out=out) + assert out is ret + assert_almost_equal(out, A.dot(B).dot(C)) + assert_almost_equal(out, np.dot(A, np.dot(B, C))) + + def test_two_arguments_and_out(self): + # separate code path with two arguments + A = np.random.random((6, 2)) + B = np.random.random((2, 6)) + out = np.zeros((6, 6)) + ret = multi_dot([A, B], out=out) + assert out is ret + assert_almost_equal(out, A.dot(B)) + assert_almost_equal(out, np.dot(A, B)) + + def test_dynamic_programing_optimization_and_out(self): + # multi_dot with four or more arguments uses the dynamic programing + # optimization and therefore deserve a separate test + A = np.random.random((6, 2)) + B = np.random.random((2, 6)) + C = np.random.random((6, 2)) + D = np.random.random((2, 1)) + out = np.zeros((6, 1)) + ret = multi_dot([A, B, C, D], out=out) + assert out is ret + assert_almost_equal(out, A.dot(B).dot(C).dot(D)) + def test_dynamic_programming_logic(self): # Test for the dynamic programming part # This test is directly taken from Cormen page 376. diff --git a/numpy/ma/core.py b/numpy/ma/core.py index a5e59bb74..a7214f9bf 100644 --- a/numpy/ma/core.py +++ b/numpy/ma/core.py @@ -285,8 +285,10 @@ def _extremum_fill_value(obj, extremum, extremum_name): def _scalar_fill_value(dtype): try: return extremum[dtype] - except KeyError: - raise TypeError(f"Unsuitable type {dtype} for calculating {extremum_name}.") + except KeyError as e: + raise TypeError( + f"Unsuitable type {dtype} for calculating {extremum_name}." + ) from None dtype = _get_dtype_of(obj) return _recursive_fill_value(dtype, _scalar_fill_value) diff --git a/numpy/random/_generator.pyx b/numpy/random/_generator.pyx index b976d51c6..27cb2859e 100644 --- a/numpy/random/_generator.pyx +++ b/numpy/random/_generator.pyx @@ -4007,10 +4007,12 @@ cdef class Generator: # return val cdef np.npy_intp k, totsize, i, j - cdef np.ndarray alpha_arr, val_arr + cdef np.ndarray alpha_arr, val_arr, alpha_csum_arr + cdef double csum cdef double *alpha_data + cdef double *alpha_csum_data cdef double *val_data - cdef double acc, invacc + cdef double acc, invacc, v k = len(alpha) alpha_arr = <np.ndarray>np.PyArray_FROMANY( @@ -4034,17 +4036,74 @@ cdef class Generator: i = 0 totsize = np.PyArray_SIZE(val_arr) - with self.lock, nogil: - while i < totsize: - acc = 0.0 - for j in range(k): - val_data[i+j] = random_standard_gamma(&self._bitgen, - alpha_data[j]) - acc = acc + val_data[i + j] - invacc = 1/acc - for j in range(k): - val_data[i + j] = val_data[i + j] * invacc - i = i + k + + # Select one of the following two algorithms for the generation + # of Dirichlet random variates (RVs) + # + # A) Small alpha case: Use the stick-breaking approach with beta + # random variates (RVs). + # B) Standard case: Perform unit normalisation of a vector + # of gamma random variates + # + # A) prevents NaNs resulting from 0/0 that may occur in B) + # when all values in the vector ':math:\\alpha' are smaller + # than 1, then there is a nonzero probability that all + # generated gamma RVs will be 0. When that happens, the + # normalization process ends up computing 0/0, giving nan. A) + # does not use divisions, so that a situation in which 0/0 has + # to be computed cannot occur. A) is slower than B) as + # generation of beta RVs is slower than generation of gamma + # RVs. A) is selected whenever `alpha.max() < t`, where `t < + # 1` is a threshold that controls the probability of + # generating a NaN value when B) is used. For a given + # threshold `t` this probability can be bounded by + # `gammainc(t, d)` where `gammainc` is the regularized + # incomplete gamma function and `d` is the smallest positive + # floating point number that can be represented with a given + # precision. For the chosen threshold `t=0.1` this probability + # is smaller than `1.8e-31` for double precision floating + # point numbers. + + if (k > 0) and (alpha_arr.max() < 0.1): + # Small alpha case: Use stick-breaking approach with beta + # random variates (RVs). + # alpha_csum_data will hold the cumulative sum, right to + # left, of alpha_arr. + # Use a numpy array for memory management only. We could just as + # well have malloc'd alpha_csum_data. alpha_arr is a C-contiguous + # double array, therefore so is alpha_csum_arr. + alpha_csum_arr = np.empty_like(alpha_arr) + alpha_csum_data = <double*>np.PyArray_DATA(alpha_csum_arr) + csum = 0.0 + for j in range(k - 1, -1, -1): + csum += alpha_data[j] + alpha_csum_data[j] = csum + + with self.lock, nogil: + while i < totsize: + acc = 1. + for j in range(k - 1): + v = random_beta(&self._bitgen, alpha_data[j], + alpha_csum_data[j + 1]) + val_data[i + j] = acc * v + acc *= (1. - v) + val_data[i + k - 1] = acc + i = i + k + + else: + # Standard case: Unit normalisation of a vector of gamma random + # variates + with self.lock, nogil: + while i < totsize: + acc = 0. + for j in range(k): + val_data[i + j] = random_standard_gamma(&self._bitgen, + alpha_data[j]) + acc = acc + val_data[i + j] + invacc = 1. / acc + for j in range(k): + val_data[i + j] = val_data[i + j] * invacc + i = i + k return diric diff --git a/numpy/random/_pcg64.pyx b/numpy/random/_pcg64.pyx index 05d27db5c..605aae4bc 100644 --- a/numpy/random/_pcg64.pyx +++ b/numpy/random/_pcg64.pyx @@ -38,7 +38,7 @@ cdef double pcg64_double(void* st) nogil: cdef class PCG64(BitGenerator): """ - PCG64(seed_seq=None) + PCG64(seed=None) BitGenerator for the PCG-64 pseudo-random number generator. diff --git a/numpy/random/tests/test_generator_mt19937.py b/numpy/random/tests/test_generator_mt19937.py index 08b44e4db..a28b7ca11 100644 --- a/numpy/random/tests/test_generator_mt19937.py +++ b/numpy/random/tests/test_generator_mt19937.py @@ -1103,6 +1103,31 @@ class TestRandomDist: size=(3, 2)) assert_array_almost_equal(non_contig, contig) + def test_dirichlet_small_alpha(self): + eps = 1.0e-9 # 1.0e-10 -> runtime x 10; 1e-11 -> runtime x 200, etc. + alpha = eps * np.array([1., 1.0e-3]) + random = Generator(MT19937(self.seed)) + actual = random.dirichlet(alpha, size=(3, 2)) + expected = np.array([ + [[1., 0.], + [1., 0.]], + [[1., 0.], + [1., 0.]], + [[1., 0.], + [1., 0.]] + ]) + assert_array_almost_equal(actual, expected, decimal=15) + + @pytest.mark.slow + def test_dirichlet_moderately_small_alpha(self): + # Use alpha.max() < 0.1 to trigger stick breaking code path + alpha = np.array([0.02, 0.04, 0.03]) + exact_mean = alpha / alpha.sum() + random = Generator(MT19937(self.seed)) + sample = random.dirichlet(alpha, size=20000000) + sample_mean = sample.mean(axis=0) + assert_allclose(sample_mean, exact_mean, rtol=1e-3) + def test_exponential(self): random = Generator(MT19937(self.seed)) actual = random.exponential(1.1234, size=(3, 2)) |