Docstring update: random

author: Pauli Virtanen <pav@iki.fi> 2009-10-02 19:37:07 +0000
committer: Pauli Virtanen <pav@iki.fi> 2009-10-02 19:37:07 +0000
commit: c6e430d844ce869ca419b8ab2fb568fa0d11f809 (patch)
tree: b81880d49dcd9535008a90794cd3e565deb27ccc /numpy/random
parent: 094a81e1352fb6b3a7a614fc8df23d0080cf7cb5 (diff)
download: numpy-c6e430d844ce869ca419b8ab2fb568fa0d11f809.tar.gz
1 files changed, 903 insertions, 70 deletions
diff --git a/numpy/random/mtrand/mtrand.pyx b/numpy/random/mtrand/mtrand.pyx
index 3473c2a80..e98414ae6 100644
--- a/numpy/random/mtrand/mtrand.pyx
+++ b/numpy/random/mtrand/mtrand.pyx
@@ -520,26 +520,34 @@ cdef class RandomState:
     """
     RandomState(seed=None)
 
-    Container for the Mersenne Twister PRNG.
+    Container for the Mersenne Twister pseudo-random number generator.
 
     `RandomState` exposes a number of methods for generating random numbers
     drawn from a variety of probability distributions. In addition to the
     distribution-specific arguments, each method takes a keyword argument
     `size` that defaults to ``None``. If `size` is ``None``, then a single
     value is generated and returned. If `size` is an integer, then a 1-D
-    numpy array filled with generated values is returned. If size is a tuple,
-    then a numpy array with that shape is filled and returned.
+    array filled with generated values is returned. If `size` is a tuple,
+    then an array with that shape is filled and returned.
 
     Parameters
     ----------
-    seed : array_like, int, optional
-        Random seed initializing the PRNG.
+    seed : int or array_like, optional
+        Random seed initializing the pseudo-random number generator.
         Can be an integer, an array (or other sequence) of integers of
-        any length, or ``None``.
+        any length, or ``None`` (the default).
         If `seed` is ``None``, then `RandomState` will try to read data from
         ``/dev/urandom`` (or the Windows analogue) if available or seed from
         the clock otherwise.
 
+    Notes
+    -----
+    The Python stdlib module "random" also contains a Mersenne Twister
+    pseudo-random number generator with a number of methods that are similar
+    to the ones available in `RandomState`. `RandomState`, besides being
+    NumPy-aware, has the advantage that it provides a much larger number
+    of probability distributions to choose from.
+
     """
     cdef rk_state *internal_state
 
@@ -559,10 +567,17 @@ cdef class RandomState:
 
         Seed the generator.
 
-        seed can be an integer, an array (or other sequence) of integers of any
-        length, or None. If seed is None, then RandomState will try to read data
-        from /dev/urandom (or the Windows analogue) if available or seed from
-        the clock otherwise.
+        This method is called when `RandomState` is initialized. It can be
+        called again to re-seed the generator. For details, see `RandomState`.
+
+        Parameters
+        ----------
+        seed : int or array_like, optional
+            Seed for `RandomState`.
+
+        See Also
+        --------
+        RandomState
 
         """
         cdef rk_error errcode
@@ -585,21 +600,29 @@ cdef class RandomState:
 
         Return a tuple representing the internal state of the generator.
 
+        For more details, see `set_state`.
+
         Returns
         -------
-        out : tuple(string, list of 624 integers, int, int, float)
+        out : tuple(str, ndarray of 624 uints, int, int, float)
             The returned tuple has the following items:
 
-            1. the string 'MT19937'
-            2. a list of 624 integer keys
-            3. an integer pos
-            4. an integer has_gauss
-            5. and a float cached_gaussian
+            1. the string 'MT19937'.
+            2. a 1-D array of 624 unsigned integer keys.
+            3. an integer ``pos``.
+            4. an integer ``has_gauss``.
+            5. a float ``cached_gaussian``.
 
         See Also
         --------
         set_state
 
+        Notes
+        -----
+        `set_state` and `get_state` are not needed to work with any of the
+        random distributions in NumPy. If the internal state is manually altered,
+        the user should know exactly what he/she is doing.
+
         """
         cdef ndarray state "arrayObject_state"
         state = <ndarray>np.empty(624, np.uint)
@@ -612,18 +635,21 @@ cdef class RandomState:
         """
         set_state(state)
 
-        Set the state from a tuple.
+        Set the internal state of the generator from a tuple.
+
+        For use if one has reason to manually (re-)set the internal state of the
+        "Mersenne Twister"[1]_ pseudo-random number generating algorithm.
 
         Parameters
         ----------
-        state : tuple(string, list of 624 ints, int, int, float)
-            The `state` tuple is made up of
+        state : tuple(str, ndarray of 624 uints, int, int, float)
+            The `state` tuple has the following items:
 
-            1. the string 'MT19937'
-            2. a list of 624 integer keys
-            3. an integer pos
-            4. an integer has_gauss
-            5. and a float for the cached_gaussian
+            1. the string 'MT19937', specifying the Mersenne Twister algorithm.
+            2. a 1-D array of 624 unsigned integers ``keys``.
+            3. an integer ``pos``.
+            4. an integer ``has_gauss``.
+            5. a float ``cached_gaussian``.
 
         Returns
         -------
@@ -636,10 +662,20 @@ cdef class RandomState:
 
         Notes
         -----
-        For backwards compatibility, the following form is also accepted
-        although it is missing some information about the cached Gaussian value.
+        `set_state` and `get_state` are not needed to work with any of the
+        random distributions in NumPy. If the internal state is manually altered,
+        the user should know exactly what he/she is doing.
 
-        state = ('MT19937', int key[624], int pos)
+        For backwards compatibility, the form (str, array of 624 uints, int) is
+        also accepted although it is missing some information about the cached
+        Gaussian value: ``state = ('MT19937', keys, pos)``.
+
+        References
+        ----------
+        .. [1] M. Matsumoto and T. Nishimura, "Mersenne Twister: A
+           623-dimensionally equidistributed uniform pseudorandom number
+           generator," *ACM Trans. on Modeling and Computer Simulation*,
+           Vol. 8, No. 1, pp. 3-30, Jan. 1998.
 
         """
         cdef ndarray obj "arrayObject_obj"
@@ -682,15 +718,39 @@ cdef class RandomState:
 
         Return random floats in the half-open interval [0.0, 1.0).
 
+        Results are from the "continuous uniform" distribution over the
+        stated interval.  To sample :math:`Unif[a, b), b > a` multiply
+        the output of `random_sample` by `(b-a)` and add `a`::
+
+          (b - a) * random_sample() + a
+
         Parameters
         ----------
-        size : shape tuple, optional
-            Defines the shape of the returned array of random floats.
+        size : int or tuple of ints, optional
+            Defines the shape of the returned array of random floats. If None
+            (the default), returns a single float.
 
         Returns
         -------
-        out : ndarray, floats
-            Array of random of floats with shape of `size`.
+        out : float or ndarray of floats
+            Array of random floats of shape `size` (unless ``size=None``, in which
+            case a single float is returned).
+
+        Examples
+        --------
+        >>> np.random.random_sample()
+        0.47108547995356098
+        >>> type(np.random.random_sample())
+        <type 'float'>
+        >>> np.random.random_sample((5,))
+        array([ 0.30220482,  0.86820401,  0.1654503 ,  0.11659149,  0.54323428])
+
+        Three-by-two array of random numbers from [-5, 0):
+
+        >>> 5 * np.random.random_sample((3, 2)) - 5
+        array([[-3.99149989, -0.52338984],
+               [-2.99091858, -0.79479508],
+               [-1.23204345, -1.75224494]])
 
         """
         return cont0_array(self.internal_state, rk_double, size)
@@ -727,9 +787,50 @@ cdef class RandomState:
         """
         randint(low, high=None, size=None)
 
-        Return random integers x such that low <= x < high.
+        Return random integers from `low` (inclusive) to `high` (exclusive).
 
-        If high is None, then 0 <= x < low.
+        Return random integers from the "discrete uniform" distribution in the
+        "half-open" interval [`low`, `high`). If `high` is None (the default),
+        then results are from [0, `low`).
+
+        Parameters
+        ----------
+        low : int
+            Lowest (signed) integer to be drawn from the distribution (unless
+            ``high=None``, in which case this parameter is the *highest* such
+            integer).
+        high : int, optional
+            If provided, one above the largest (signed) integer to be drawn
+            from the distribution (see above for behavior if ``high=None``).
+        size : int or tuple of ints, optional
+            Output shape. Default is None, in which case a single int is
+            returned.
+
+        Returns
+        -------
+        out : int or ndarray of ints
+            `size`-shaped array of random integers from the appropriate
+            distribution, or a single such random int if `size` not provided.
+
+        See Also
+        --------
+        random.random_integers : similar to `randint`, only for the closed
+            interval [`low`, `high`], and 1 is the lowest value if `high` is
+            omitted. In particular, this other one is the one to use to generate
+            uniformly distributed discrete non-integers.
+
+        Examples
+        --------
+        >>> np.random.randint(2, size=10)
+        array([1, 0, 0, 0, 1, 1, 0, 0, 1, 0])
+        >>> np.random.randint(1, size=10)
+        array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])
+
+        Generate a 2 x 4 array of ints between 0 and 4, inclusive:
+
+        >>> np.random.randint(5, size=(2, 4))
+        array([[4, 0, 2, 1],
+               [3, 2, 2, 0]])
 
         """
         cdef long lo, hi, diff
@@ -920,14 +1021,53 @@ cdef class RandomState:
 
     def randn(self, *args):
         """
-        randn(d0, d1, ..., dn)
+        randn([d1, ..., dn])
+
+        Return a sample (or samples) from the "standard normal" distribution.
+
+        If positive, int_like or int-convertible arguments are provided,
+        `randn` generates an array of shape ``(d1, ..., dn)``, filled
+        with random floats sampled from a univariate "normal" (Gaussian)
+        distribution of mean 0 and variance 1 (if any of the :math:`d_i` are
+        floats, they are first converted to integers by truncation). A single
+        float randomly sampled from the distribution is returned if no
+        argument is provided.
+
+        This is a convenience function.  If you want an interface that takes a
+        tuple as the first argument, use `numpy.random.standard_normal` instead.
+
+        Parameters
+        ----------
+        d1, ..., dn : `n` ints, optional
+            The dimensions of the returned array, should be all positive.
+
+        Returns
+        -------
+        Z : ndarray or float
+            A ``(d1, ..., dn)``-shaped array of floating-point samples from
+            the standard normal distribution, or a single such float if
+            no parameters were supplied.
+
+        See Also
+        --------
+        random.standard_normal : Similar, but takes a tuple as its argument.
 
-        Returns zero-mean, unit-variance Gaussian random numbers in an
-        array of shape (d0, d1, ..., dn).
+        Notes
+        -----
+        For random samples from :math:`N(\\mu, \\sigma^2)`, use:
+
+        ``sigma * np.random.randn(...) + mu``
+
+        Examples
+        --------
+        >>> np.random.randn()
+        2.1923875335537315 #random
+
+        Two-by-four array of samples from N(3, 6.25):
 
-        Note:  This is a convenience function. If you want an
-                    interface that takes a tuple as the first argument
-                    use numpy.random.standard_normal(shape_tuple).
+        >>> 2.5 * np.random.randn(2, 4) + 3
+        array([[-4.49401501,  4.00950034, -1.81814867,  7.29718677],  #random
+               [ 0.39924804,  4.68456316,  4.99394529,  4.84057254]]) #random
 
         """
         if len(args) == 0:
@@ -939,9 +1079,72 @@ cdef class RandomState:
         """
         random_integers(low, high=None, size=None)
 
-        Return random integers x such that low <= x <= high.
+        Return random integers between `low` and `high`, inclusive.
+
+        Return random integers from the "discrete uniform" distribution in the
+        closed interval [`low`, `high`].  If `high` is None (the default),
+        then results are from [1, `low`].
+
+        Parameters
+        ----------
+        low : int
+            Lowest (signed) integer to be drawn from the distribution (unless
+            ``high=None``, in which case this parameter is the *highest* such
+            integer).
+        high : int, optional
+            If provided, the largest (signed) integer to be drawn from the
+            distribution (see above for behavior if ``high=None``).
+        size : int or tuple of ints, optional
+            Output shape. Default is None, in which case a single int is returned.
+
+        Returns
+        -------
+        out : int or ndarray of ints
+            `size`-shaped array of random integers from the appropriate
+            distribution, or a single such random int if `size` not provided.
+
+        See Also
+        --------
+        random.randint : Similar to `random_integers`, only for the half-open
+            interval [`low`, `high`), and 0 is the lowest value if `high` is
+            omitted.
+
+        Notes
+        -----
+        To sample from N evenly spaced floating-point numbers between a and b,
+        use::
+
+          a + (b - a) * (np.random.random_integers(N) - 1) / (N - 1.)
+
+        Examples
+        --------
+        >>> np.random.random_integers(5)
+        4
+        >>> type(np.random.random_integers(5))
+        <type 'int'>
+        >>> np.random.random_integers(5, size=(3.,2.))
+        array([[5, 4],
+               [3, 3],
+               [4, 5]])
+
+        Choose five random numbers from the set of five evenly-spaced
+        numbers between 0 and 2.5, inclusive (*i.e.*, from the set
+        :math:`{0, 5/8, 10/8, 15/8, 20/8}`):
 
-        If high is None, then 1 <= x <= low.
+        >>> 2.5 * (np.random.random_integers(5, size=(5,)) - 1) / 4.
+        array([ 0.625,  1.25 ,  0.625,  0.625,  2.5  ])
+
+        Roll two six sided dice 1000 times and sum the results:
+
+        >>> d1 = np.random.random_integers(1, 6, 1000)
+        >>> d2 = np.random.random_integers(1, 6, 1000)
+        >>> dsums = d1 + d2
+
+        Display results as a histogram:
+
+        >>> import matplotlib.pyplot as plt
+        >>> count, bins, ignored = plt.hist(dsums, 11, normed=True)
+        >>> plt.show()
 
         """
         if high is None:
@@ -958,15 +1161,26 @@ cdef class RandomState:
 
         Parameters
         ----------
-        size : int, shape tuple, optional
-            Returns the number of samples required to satisfy the `size` parameter.
-            If not given or 'None' indicates to return one sample.
+        size : int or tuple of ints, optional
+            Output shape. Default is None, in which case a single value is
+            returned.
 
         Returns
         -------
-        out : float, ndarray
-            Samples the Standard Normal distribution with a shape satisfying the
-            `size` parameter.
+        out : float or ndarray
+            Drawn samples.
+
+        Examples
+        --------
+        >>> s = np.random.standard_normal(8000)
+        >>> s
+        array([ 0.6888893 ,  0.78096262, -0.89086505, ...,  0.49876311, #random
+               -0.38672696, -0.4685006 ])                               #random
+        >>> s.shape
+        (8000,)
+        >>> s = np.random.standard_normal(size=(3, 4, 2))
+        >>> s.shape
+        (3, 4, 2)
 
         """
         return cont0_array(self.internal_state, rk_gauss, size)
@@ -1188,7 +1402,26 @@ cdef class RandomState:
         """
         standard_exponential(size=None)
 
-        Standard exponential distribution (scale=1).
+        Draw samples from the standard exponential distribution.
+
+        `standard_exponential` is identical to the exponential distribution
+        with a scale parameter of 1.
+
+        Parameters
+        ----------
+        size : int or tuple of ints
+            Shape of the output.
+
+        Returns
+        -------
+        out : float or ndarray
+            Drawn samples.
+
+        Examples
+        --------
+        Output a 3x8000 array:
+
+        >>> n = np.random.standard_exponential((3, 8000))
 
         """
         return cont0_array(self.internal_state, rk_standard_exponential, size)
@@ -1197,7 +1430,67 @@ cdef class RandomState:
         """
         standard_gamma(shape, size=None)
 
-        Standard Gamma distribution.
+        Draw samples from a Standard Gamma distribution.
+
+        Samples are drawn from a Gamma distribution with specified parameters,
+        shape (sometimes designated "k") and scale=1.
+
+        Parameters
+        ----------
+        shape : float
+            Parameter, should be > 0.
+        size : int or tuple of ints
+            Output shape.  If the given shape is, e.g., ``(m, n, k)``, then
+            ``m * n * k`` samples are drawn.
+
+        Returns
+        -------
+        samples : ndarray or scalar
+            The drawn samples.
+
+        See Also
+        --------
+        scipy.stats.distributions.gamma : probability density function,
+            distribution or cumulative density function, etc.
+
+        Notes
+        -----
+        The probability density for the Gamma distribution is
+
+        .. math:: p(x) = x^{k-1}\\frac{e^{-x/\\theta}}{\\theta^k\\Gamma(k)},
+
+        where :math:`k` is the shape and :math:`\\theta` the scale,
+        and :math:`\\Gamma` is the Gamma function.
+
+        The Gamma distribution is often used to model the times to failure of
+        electronic components, and arises naturally in processes for which the
+        waiting times between Poisson distributed events are relevant.
+
+        References
+        ----------
+        .. [1] Weisstein, Eric W. "Gamma Distribution." From MathWorld--A
+               Wolfram Web Resource.
+               http://mathworld.wolfram.com/GammaDistribution.html
+        .. [2] Wikipedia, "Gamma-distribution",
+               http://en.wikipedia.org/wiki/Gamma-distribution
+
+        Examples
+        --------
+        Draw samples from the distribution:
+
+        >>> shape, scale = 2., 1. # mean and width
+        >>> s = np.random.standard_gamma(shape, 1000000)
+
+        Display the histogram of the samples, along with
+        the probability density function:
+
+        >>> import matplotlib.pyplot as plt
+        >>> import scipy.special as sps
+        >>> count, bins, ignored = plt.hist(s, 50, normed=True)
+        >>> y = bins**(shape-1) * ((np.exp(-bins/scale))/ \\
+        ...                       (sps.gamma(shape) * scale**shape))
+        >>> plt.plot(bins, y, linewidth=2, color='r')
+        >>> plt.show()
 
         """
         cdef ndarray oshape
@@ -1413,7 +1706,64 @@ cdef class RandomState:
         """
         noncentral_f(dfnum, dfden, nonc, size=None)
 
-        Noncentral F distribution.
+        Draw samples from the noncentral F distribution.
+
+        Samples are drawn from an F distribution with specified parameters,
+        `dfnum` (degrees of freedom in numerator) and `dfden` (degrees of
+        freedom in denominator), where both parameters > 1.
+        `nonc` is the non-centrality parameter.
+
+        Parameters
+        ----------
+        dfnum : int
+            Parameter, should be > 1.
+        dfden : int
+            Parameter, should be > 1.
+        nonc : float
+            Parameter, should be >= 0.
+        size : int or tuple of ints
+            Output shape. If the given shape is, e.g., ``(m, n, k)``, then
+            ``m * n * k`` samples are drawn.
+
+        Returns
+        -------
+        samples : scalar or ndarray
+            Drawn samples.
+
+        Notes
+        -----
+        When calculating the power of an experiment (power = probability of
+        rejecting the null hypothesis when a specific alternative is true) the
+        non-central F statistic becomes important.  When the null hypothesis is
+        true, the F statistic follows a central F distribution. When the null
+        hypothesis is not true, then it follows a non-central F statistic.
+
+        References
+        ----------
+        Weisstein, Eric W. "Noncentral F-Distribution." From MathWorld--A Wolfram
+        Web Resource.  http://mathworld.wolfram.com/NoncentralF-Distribution.html
+
+        Wikipedia, "Noncentral F distribution",
+        http://en.wikipedia.org/wiki/Noncentral_F-distribution
+
+        Examples
+        --------
+        In a study, testing for a specific alternative to the null hypothesis
+        requires use of the Noncentral F distribution. We need to calculate the
+        area in the tail of the distribution that exceeds the value of the F
+        distribution for the null hypothesis.  We'll plot the two probability
+        distributions for comparison.
+
+        >>> dfnum = 3 # between group deg of freedom
+        >>> dfden = 20 # within groups degrees of freedom
+        >>> nonc = 3.0
+        >>> nc_vals = np.random.noncentral_f(dfnum, dfden, nonc, 1000000)
+        >>> NF = np.histogram(nc_vals, bins=50, normed=True)
+        >>> c_vals = np.random.f(dfnum, dfden, 1000000)
+        >>> F = np.histogram(c_vals, bins=50, normed=True)
+        >>> plt.plot(F[1][1:], F[0])
+        >>> plt.plot(NF[1][1:], NF[0])
+        >>> plt.show()
 
         """
         cdef ndarray odfnum, odfden, ononc
@@ -1539,12 +1889,62 @@ cdef class RandomState:
         Parameters
         ----------
         df : int
-            Degrees of freedom.
+            Degrees of freedom, should be >= 1.
         nonc : float
-            Non-centrality.
-        size : tuple of ints
+            Non-centrality, should be > 0.
+        size : int or tuple of ints
             Shape of the output.
 
+        Notes
+        -----
+        The probability density function for the noncentral Chi-square distribution
+        is
+
+        .. math:: P(x;df,nonc) = \\sum^{\\infty}_{i=0}
+                               \\frac{e^{-nonc/2}(nonc/2)^{i}}{i!}P_{Y_{df+2i}}(x),
+
+        where :math:`Y_{q}` is the Chi-square with q degrees of freedom.
+
+        In Delhi (2007), it is noted that the noncentral chi-square is useful in
+        bombing and coverage problems, the probability of killing the point target
+        given by the noncentral chi-squared distribution.
+
+        References
+        ----------
+        .. [1] Delhi, M.S. Holla, "On a noncentral chi-square distribution in the
+               analysis of weapon systems effectiveness", Metrika, Volume 15,
+               Number 1 / December, 1970.
+        .. [2] Wikipedia, "Noncentral chi-square distribution"
+               http://en.wikipedia.org/wiki/Noncentral_chi-square_distribution
+
+        Examples
+        --------
+        Draw values from the distribution and plot the histogram
+
+        >>> import matplotlib.pyplot as plt
+        >>> values = plt.hist(np.random.noncentral_chisquare(3, 20, 100000),
+        ...                   bins=200, normed=True)
+        >>> plt.show()
+
+        Draw values from a noncentral chisquare with very small noncentrality,
+        and compare to a chisquare.
+
+        >>> plt.figure()
+        >>> values = plt.hist(np.random.noncentral_chisquare(3, .0000001, 100000),
+        ...                   bins=np.arange(0., 25, .1), normed=True)
+        >>> values2 = plt.hist(np.random.chisquare(3, 100000),
+        ...                    bins=np.arange(0., 25, .1), normed=True)
+        >>> plt.plot(values[1][0:-1], values[0]-values2[0], 'ob')
+        >>> plt.show()
+
+        Demonstrate how large values of non-centrality lead to a more symmetric
+        distribution.
+
+        >>> plt.figure()
+        >>> values = plt.hist(np.random.noncentral_chisquare(3, 20, 100000),
+        ...                   bins=200, normed=True)
+        >>> plt.show()
+
         """
         cdef ndarray odf, ononc
         cdef double fdf, fnonc
@@ -1573,7 +1973,59 @@ cdef class RandomState:
         """
         standard_cauchy(size=None)
 
-        Standard Cauchy with mode=0.
+        Standard Cauchy distribution with mode = 0.
+
+        Also known as the Lorentz distribution.
+
+        Parameters
+        ----------
+        size : int or tuple of ints
+            Shape of the output.
+
+        Returns
+        -------
+        samples : ndarray or scalar
+            The drawn samples.
+
+        Notes
+        -----
+        The probability density function for the full Cauchy distribution is
+
+        .. math:: P(x; x_0, \\gamma) = \\frac{1}{\\pi \\gamma \\bigl[ 1+
+                  (\\frac{x-x_0}{\\gamma})^2 \\bigr] }
+
+        and the Standard Cauchy distribution just sets :math:`x_0=0` and
+        :math:`\\gamma=1`
+
+        The Cauchy distribution arises in the solution to the driven harmonic
+        oscillator problem, and also describes spectral line broadening. It
+        also describes the distribution of values at which a line tilted at
+        a random angle will cut the x axis.
+
+        When studying hypothesis tests that assume normality, seeing how the
+        tests perform on data from a Cauchy distribution is a good indicator of
+        their sensitivity to a heavy-tailed distribution, since the Cauchy looks
+        very much like a Gaussian distribution, but with heavier tails.
+
+        References
+        ----------
+        ..[1] NIST/SEMATECH e-Handbook of Statistical Methods, "Cauchy
+              Distribution",
+              http://www.itl.nist.gov/div898/handbook/eda/section3/eda3663.htm
+        ..[2] Weisstein, Eric W. "Cauchy Distribution." From MathWorld--A
+              Wolfram Web Resource.
+              http://mathworld.wolfram.com/CauchyDistribution.html
+        ..[3] Wikipedia, "Cauchy distribution"
+              http://en.wikipedia.org/wiki/Cauchy_distribution
+
+        Examples
+        --------
+        Draw samples and plot the distribution:
+
+        >>> s = np.random.standard_cauchy(1000000)
+        >>> s = s[(s>-25) & (s<25)]  # truncate distribution so it plots well
+        >>> plt.hist(s, bins=100)
+        >>> plt.show()
 
         """
         return cont0_array(self.internal_state, rk_standard_cauchy, size)
@@ -1584,6 +2036,84 @@ cdef class RandomState:
 
         Standard Student's t distribution with df degrees of freedom.
 
+        A special case of the hyperbolic distribution.
+        As `df` gets large, the result resembles that of the standard normal
+        distribution (`standard_normal`).
+
+        Parameters
+        ----------
+        df : int
+            Degrees of freedom, should be > 0.
+        size : int or tuple of ints, optional
+            Output shape. Default is None, in which case a single value is
+            returned.
+
+        Returns
+        -------
+        samples : ndarray or scalar
+            Drawn samples.
+
+        Notes
+        -----
+        The probability density function for the t distribution is
+
+        .. math:: P(x, df) = \\frac{\\Gamma(\\frac{df+1}{2})}{\\sqrt{\\pi df}
+                  \\Gamma(\\frac{df}{2})}\\Bigl( 1+\\frac{x^2}{df} \\Bigr)^{-(df+1)/2}
+
+        The t test is based on an assumption that the data come from a Normal
+        distribution. The t test provides a way to test whether the sample mean
+        (that is the mean calculated from the data) is a good estimate of the true
+        mean.
+
+        The derivation of the t-distribution was forst published in 1908 by William
+        Gisset while working for the Guinness Brewery in Dublin. Due to proprietary
+        issues, he had to publish under a pseudonym, and so he used the name
+        Student.
+
+        References
+        ----------
+        .. [1] Dalgaard, Peter, "Introductory Statistics With R",
+               Springer, 2002.
+        .. [2] Wikipedia, "Student's t-distribution"
+               http://en.wikipedia.org/wiki/Student's_t-distribution
+
+        Examples
+        --------
+        From Dalgaard page 83 [1]_, suppose the daily energy intake for 11
+        women in Kj is:
+
+        >>> intake = np.array([5260., 5470, 5640, 6180, 6390, 6515, 6805, 7515, \\
+        ...                    7515, 8230, 8770])
+
+        Does their energy intake deviate systematically from the recommended
+        value of 7725 kJ?
+
+        We have 10 degrees of freedom, so is the sample mean within 95% of the
+        recommended value?
+
+        >>> s = np.random.standard_t(10, size=100000)
+        >>> np.mean(intake)
+        6753.636363636364
+        >>> intake.std(ddof=1)
+        1142.1232221373727
+
+        Calculate the t statistic, setting the ddof parameter to the unbiased
+        value so the divisor in the standard deviation will be degrees of
+        freedom, N-1.
+
+        >>> t = (np.mean(intake)-7725)/(intake.std(ddof=1)/np.sqrt(len(intake)))
+        >>> import matplotlib.pyplot as plt
+        >>> h = plt.hist(s, bins=100, normed=True)
+
+        For a one-sided t-test, how far out in the distribution does the t
+        statistic appear?
+
+        >>> >>> np.sum(s<t) / float(len(s))
+        0.0090699999999999999  #random
+
+        So the p-value is about 0.009, which says the null hypothesis has a
+        probability of about 99% of being true.
+
         """
         cdef ndarray odf
         cdef double fdf
@@ -1998,15 +2528,13 @@ cdef class RandomState:
         """
         laplace(loc=0.0, scale=1.0, size=None)
 
-        Laplace or double exponential distribution.
-
-        It has the probability density function
-
-        .. math:: f(x; \\mu, \\lambda) = \\frac{1}{2\\lambda}
-                                       \\exp\\left(-\\frac{|x - \\mu|}{\\lambda}\\right).
+        Draw samples from the Laplace or double exponential distribution with
+        specified location (or mean) and scale (decay).
 
         The Laplace distribution is similar to the Gaussian/normal distribution,
-        but is sharper at the peak and has fatter tails.
+        but is sharper at the peak and has fatter tails. It represents the
+        difference between two independent, identically distributed exponential
+        random variables.
 
         Parameters
         ----------
@@ -2015,6 +2543,59 @@ cdef class RandomState:
         scale : float
             :math:`\\lambda`, the exponential decay.
 
+        Notes
+        -----
+        It has the probability density function
+
+        .. math:: f(x; \\mu, \\lambda) = \\frac{1}{2\\lambda}
+                                       \\exp\\left(-\\frac{|x - \\mu|}{\\lambda}\\right).
+
+        The first law of Laplace, from 1774, states that the frequency of an error
+        can be expressed as an exponential function of the absolute magnitude of
+        the error, which leads to the Laplace distribution. For many problems in
+        Economics and Health sciences, this distribution seems to model the data
+        better than the standard Gaussian distribution
+
+
+        References
+        ----------
+        .. [1] Abramowitz, M. and Stegun, I. A. (Eds.). Handbook of Mathematical
+               Functions with Formulas, Graphs, and Mathematical Tables, 9th
+               printing.  New York: Dover, 1972.
+
+        .. [2] The Laplace distribution and generalizations
+               By Samuel Kotz, Tomasz J. Kozubowski, Krzysztof Podgorski,
+               Birkhauser, 2001.
+
+        .. [3] Weisstein, Eric W. "Laplace Distribution."
+               From MathWorld--A Wolfram Web Resource.
+               http://mathworld.wolfram.com/LaplaceDistribution.html
+
+        .. [4] Wikipedia, "Laplace distribution",
+               http://en.wikipedia.org/wiki/Laplace_distribution
+
+        Examples
+        --------
+        Draw samples from the distribution
+
+        >>> loc, scale = 0., 1.
+        >>> s = np.random.laplace(loc, scale, 1000)
+
+        Display the histogram of the samples, along with
+        the probability density function:
+
+        >>> import matplotlib.pyplot as plt
+        >>> count, bins, ignored = plt.hist(s, 30, normed=True)
+        >>> x = np.arange(-8., 8., .01)
+        >>> pdf = np.exp(-abs(x-loc/scale))/(2.*scale)
+        >>> plt.plot(x, pdf)
+
+        Plot Gaussian for comparison:
+
+        >>> g = (1/(scale * np.sqrt(2 * np.pi)) * 
+        ...      np.exp( - (x - loc)**2 / (2 * scale**2) ))
+        >>> plt.plot(x,g)
+
         """
         cdef ndarray oloc, oscale
         cdef double floc, fscale
@@ -2322,7 +2903,7 @@ cdef class RandomState:
         the probability density function:
 
         >>> import matplotlib.pyplot as plt
-        >>> count, bins, ignored = plt.hist(s, 100, normed=True, align='center')
+        >>> count, bins, ignored = plt.hist(s, 100, normed=True, align='mid')
 
         >>> x = np.linspace(min(bins), max(bins), 10000)
         >>> pdf = (np.exp(-(np.log(x) - mu)**2 / (2 * sigma**2))
@@ -2380,7 +2961,55 @@ cdef class RandomState:
         """
         rayleigh(scale=1.0, size=None)
 
-        Rayleigh distribution.
+        Draw samples from a Rayleigh distribution.
+
+        The :math:`\\chi` and Weibull distributions are generalizations of the
+        Rayleigh.
+
+        Parameters
+        ----------
+        scale : scalar
+            Scale, also equals the mode. Should be >= 0.
+        size : int or tuple of ints, optional
+            Shape of the output. Default is None, in which case a single
+            value is returned.
+
+        Notes
+        -----
+        The probability density function for the Rayleigh distribution is
+
+        .. math:: P(x;scale) = \\frac{x}{scale^2}e^{\\frac{-x^2}{2 \\cdotp scale^2}}
+
+        The Rayleigh distribution arises if the wind speed and wind direction are
+        both gaussian variables, then the vector wind velocity forms a Rayleigh
+        distribution. The Rayleigh distribution is used to model the expected
+        output from wind turbines.
+
+        References
+        ----------
+        ..[1] Brighton Webs Ltd., Rayleigh Distribution,
+              http://www.brighton-webs.co.uk/distributions/rayleigh.asp
+        ..[2] Wikipedia, "Rayleigh distribution"
+              http://en.wikipedia.org/wiki/Rayleigh_distribution
+
+        Examples
+        --------
+        Draw values from the distribution and plot the histogram
+
+        >>> values = hist(np.random.rayleigh(3, 100000), bins=200, normed=True)
+
+        Wave heights tend to follow a Rayleigh distribution. If the mean wave
+        height is 1 meter, what fraction of waves are likely to be larger than 3
+        meters?
+
+        >>> meanvalue = 1
+        >>> modevalue = np.sqrt(2 / np.pi) * meanvalue
+        >>> s = np.random.rayleigh(modevalue, 1000000)
+
+        The percentage of waves larger than 3 meters is:
+
+        >>> 100.*sum(s>3)/1000000.
+        0.087300000000000003
 
         """
         cdef ndarray oscale
@@ -2404,7 +3033,63 @@ cdef class RandomState:
         """
         wald(mean, scale, size=None)
 
-        Wald (inverse Gaussian) distribution.
+        Draw samples from a Wald, or Inverse Gaussian, distribution.
+
+        As the scale approaches infinity, the distribution becomes more like a
+        Gaussian.
+
+        Some references claim that the Wald is an Inverse Gaussian with mean=1, but
+        this is by no means universal.
+
+        The Inverse Gaussian distribution was first studied in relationship to
+        Brownian motion. In 1956 M.C.K. Tweedie used the name Inverse Gaussian
+        because there is an inverse relationship between the time to cover a unit
+        distance and distance covered in unit time.
+
+        Parameters
+        ----------
+        mean : scalar
+            Distribution mean, should be > 0.
+        scale : scalar
+            Scale parameter, should be >= 0.
+        size : int or tuple of ints, optional
+            Output shape. Default is None, in which case a single value is
+            returned.
+
+        Returns
+        -------
+        samples : ndarray or scalar
+            Drawn sample, all greater than zero.
+
+        Notes
+        -----
+        The probability density function for the Wald distribution is
+
+        .. math:: P(x;mean,scale) = \\sqrt{\\frac{scale}{2\\pi x^3}}e^
+                                    \\frac{-scale(x-mean)^2}{2\\cdotp mean^2x}
+
+        As noted above the Inverse Gaussian distribution first arise from attempts
+        to model Brownian Motion. It is also a competitor to the Weibull for use in
+        reliability modeling and modeling stock returns and interest rate
+        processes.
+
+        References
+        ----------
+        ..[1] Brighton Webs Ltd., Wald Distribution,
+              http://www.brighton-webs.co.uk/distributions/wald.asp
+        ..[2] Chhikara, Raj S., and Folks, J. Leroy, "The Inverse Gaussian
+              Distribution: Theory : Methodology, and Applications", CRC Press,
+              1988.
+        ..[3] Wikipedia, "Wald distribution"
+              http://en.wikipedia.org/wiki/Wald_distribution
+
+        Examples
+        --------
+        Draw values from the distribution and plot the histogram:
+
+        >>> import matplotlib.pyplot as plt
+        >>> h = plt.hist(np.random.wald(3, 2, 100000), bins=200, normed=True)
+        >>> plt.show()
 
         """
         cdef ndarray omean, oscale
@@ -2434,8 +3119,57 @@ cdef class RandomState:
         """
         triangular(left, mode, right, size=None)
 
-        Triangular distribution starting at left, peaking at mode, and
-        ending at right (left <= mode <= right).
+        Draw samples from the triangular distribution.
+
+        The triangular distribution is a continuous probability distribution with
+        lower limit left, peak at mode, and upper limit right. Unlike the other
+        distributions, these parameters directly define the shape of the pdf.
+
+        Parameters
+        ----------
+        left : scalar
+            Lower limit.
+        mode : scalar
+            The value where the peak of the distribution occurs.
+            The value should fulfill the condition ``left <= mode <= right``.
+        right : scalar
+            Upper limit, should be larger than `left`.
+        size : int or tuple of ints, optional
+            Output shape. Default is None, in which case a single value is
+            returned.
+
+        Returns
+        -------
+        samples : ndarray or scalar
+            The returned samples all lie in the interval [left, right].
+
+        Notes
+        -----
+        The probability density function for the Triangular distribution is
+
+        .. math:: P(x;l, m, r) = \\begin{cases}
+                  \\frac{2(x-l)}{(r-l)(m-l)}& \\text{for $l \\leq x \\leq m$},\\\\
+                  \\frac{2(m-x)}{(r-l)(r-m)}& \\text{for $m \\leq x \\leq r$},\\\\
+                  0& \\text{otherwise}.
+                  \\end{cases}
+
+        The triangular distribution is often used in ill-defined problems where the
+        underlying distribution is not known, but some knowledge of the limits and
+        mode exists. Often it is used in simulations.
+
+        References
+        ----------
+        ..[1] Wikipedia, "Triangular distribution"
+              http://en.wikipedia.org/wiki/Triangular_distribution
+
+        Examples
+        --------
+        Draw values from the distribution and plot the histogram:
+
+        >>> import matplotlib.pyplot as plt
+        >>> h = plt.hist(np.random.triangular(-3, 0, 8, 100000), bins=200,
+        ...              normed=True)
+        >>> plt.show()
 
         """
         cdef ndarray oleft, omode, oright
@@ -2581,7 +3315,65 @@ cdef class RandomState:
         """
         negative_binomial(n, p, size=None)
 
-        Negative Binomial distribution.
+        Draw samples from a negative_binomial distribution.
+
+        Samples are drawn from a negative_Binomial distribution with specified
+        parameters, `n` trials and `p` probability of success where `n` is an
+        integer > 0 and `p` is in the interval [0, 1].
+
+        Parameters
+        ----------
+        n : int
+            Parameter, > 0.
+        p : float
+            Parameter, >= 0 and <=1.
+        size : int or tuple of ints
+            Output shape. If the given shape is, e.g., ``(m, n, k)``, then
+            ``m * n * k`` samples are drawn.
+
+        Returns
+        -------
+        samples : int or ndarray of ints
+            Drawn samples.
+
+        Notes
+        -----
+        The probability density for the Negative Binomial distribution is
+
+        .. math:: P(N;n,p) = \\binom{N+n-1}{n-1}p^{n}(1-p)^{N},
+
+        where :math:`n-1` is the number of successes, :math:`p` is the probability
+        of success, and :math:`N+n-1` is the number of trials.
+
+        The negative binomial distribution gives the probability of n-1 successes
+        and N failures in N+n-1 trials, and success on the (N+n)th trial.
+
+        If one throws a die repeatedly until the third time a "1" appears, then the
+        probability distribution of the number of non-"1"s that appear before the
+        third "1" is a negative binomial distribution.
+
+        References
+        ----------
+        .. [1] Weisstein, Eric W. "Negative Binomial Distribution." From
+               MathWorld--A Wolfram Web Resource.
+               http://mathworld.wolfram.com/NegativeBinomialDistribution.html
+        .. [2] Wikipedia, "Negative binomial distribution",
+               http://en.wikipedia.org/wiki/Negative_binomial_distribution
+
+        Examples
+        --------
+        Draw samples from the distribution:
+
+        A real world example. A company drills wild-cat oil exploration wells, each
+        with an estimated probability of success of 0.1.  What is the probability
+        of having one success for each successive well, that is what is the
+        probability of a single success after drilling 5 wells, after 6 wells,
+        etc.?
+
+        >>> s = np.random.negative_binomial(1, 0.1, 100000)
+        >>> for i in range(1, 11):
+        ...    probability = sum(s<i) / 100000.
+        ...    print i, "wells drilled, probability of one success =", probability
 
         """
         cdef ndarray on
@@ -2618,7 +3410,48 @@ cdef class RandomState:
         """
         poisson(lam=1.0, size=None)
 
-        Poisson distribution.
+        Draw samples from a Poisson distribution.
+
+        The Poisson distribution is the limit of the Binomial
+        distribution for large N.
+
+        Parameters
+        ----------
+        lam : float
+            Expectation of interval, should be >= 0.
+        size : int or tuple of ints, optional
+            Output shape. If the given shape is, e.g., ``(m, n, k)``, then
+            ``m * n * k`` samples are drawn.
+
+        Notes
+        -----
+        The Poisson distribution
+
+        .. math:: f(k; \\lambda)=\\frac{\\lambda^k e^{-\\lambda}}{k!}
+
+        For events with an expected separation :math:`\\lambda` the Poisson
+        distribution :math:`f(k; \\lambda)` describes the probability of
+        :math:`k` events occurring within the observed interval :math:`\\lambda`.
+
+        References
+        ----------
+        .. [1] Weisstein, Eric W. "Poisson Distribution." From MathWorld--A Wolfram
+               Web Resource. http://mathworld.wolfram.com/PoissonDistribution.html
+        .. [2] Wikipedia, "Poisson distribution",
+           http://en.wikipedia.org/wiki/Poisson_distribution
+
+        Examples
+        --------
+        Draw samples from the distribution:
+
+        >>> import numpy as np
+        >>> s = np.random.poisson(5, 10000)
+
+        Display histogram of the sample:
+
+        >>> import matplotlib.pyplot as plt
+        >>> count, bins, ignored = plt.hist(s, 14, normed=True)
+        >>> plt.show()
 
         """
         cdef ndarray olam
@@ -2982,8 +3815,8 @@ cdef class RandomState:
 
         >>> def logseries(k, p):
         ...     return -p**k/(k*log(1-p))
-        >>> plt.plot(bins, logseries(bins, a)*count.max()/\\
-            logseries(bins, a).max(),'r')
+        >>> plt.plot(bins, logseries(bins, a)*count.max()/
+                     logseries(bins, a).max(), 'r')
         >>> plt.show()
 
         """
author	Pauli Virtanen <pav@iki.fi>	2009-10-02 19:37:07 +0000
committer	Pauli Virtanen <pav@iki.fi>	2009-10-02 19:37:07 +0000
commit	c6e430d844ce869ca419b8ab2fb568fa0d11f809 (patch)
tree	b81880d49dcd9535008a90794cd3e565deb27ccc /numpy/random
parent	094a81e1352fb6b3a7a614fc8df23d0080cf7cb5 (diff)
download	numpy-c6e430d844ce869ca419b8ab2fb568fa0d11f809.tar.gz