1 files changed, 256 insertions, 83 deletions
diff --git a/numpy/lib/io.py b/numpy/lib/io.py
index 3a962c7e1..b79286f30 100644
--- a/numpy/lib/io.py
+++ b/numpy/lib/io.py
@@ -57,8 +57,27 @@ def seek_gzip_factory(f):
     return f
 
 class BagObj(object):
-    """A simple class that converts attribute lookups to
-    getitems on the class passed in.
+    """
+    BagObj(obj)
+
+    Convert attribute lookups to getitems on the object passed in.
+
+    Parameters
+    ----------
+    obj : class instance
+        Object on which attribute lookup is performed.
+
+    Examples
+    --------
+    >>> class BagDemo(object):
+    ...     def __getitem__(self, key):
+    ...         return key
+    ...
+    >>> demo_obj = BagDemo()
+    >>> bagobj = np.lib.io.BagObj(demo_obj)
+    >>> bagobj.some_item
+    'some_item'
+
     """
     def __init__(self, obj):
         self._obj = obj
@@ -69,14 +88,57 @@ class BagObj(object):
             raise AttributeError, key
 
 class NpzFile(object):
-    """A dictionary-like object with lazy-loading of files in the zipped
+    """
+    NpzFile(fid)
+
+    A dictionary-like object with lazy-loading of files in the zipped
     archive provided on construction.
 
+    `NpzFile` is used to load files in the NumPy ``.npz`` data archive
+    format. It assumes that files in the archive have a ".npy" extension,
+    other files are ignored.
+
     The arrays and file strings are lazily loaded on either
-    getitem access using obj['key'] or attribute lookup using obj.f.key
+    getitem access using ``obj['key']`` or attribute lookup using
+    ``obj.f.key``. A list of all files (without ".npy" extensions) can
+    be obtained with ``obj.files`` and the ZipFile object itself using
+    ``obj.zip``.
+
+    Attributes
+    ----------
+    files : list of str
+        List of all files in the archive with a ".npy" extension.
+    zip : ZipFile instance
+        The ZipFile object initialized with the zipped archive.
+    f : BagObj instance
+        An object on which attribute can be performed as an alternative
+        to getitem access on the `NpzFile` instance itself.
+
+    Parameters
+    ----------
+    fid : file or str
+        The zipped archive to open. This is either a file-like object
+        or a string containing the path to the archive.
+
+    Examples
+    --------
+    >>> from tempfile import TemporaryFile
+    >>> outfile = TemporaryFile()
+    >>> x = np.arange(10)
+    >>> y = np.sin(x)
+    >>> np.savez(outfile, x=x, y=y)
+    >>> outfile.seek(0)
+
+    >>> npz = np.load(outfile)
+    >>> isinstance(npz, np.lib.io.NpzFile)
+    True
+    >>> npz.files
+    ['y', 'x']
+    >>> npz['x']  # getitem access
+    array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
+    >>> npz.f.x  # attribute lookup
+    array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
 
-    A list of all files (without .npy) extensions can be obtained
-    with .files and the ZipFile object itself using .zip
     """
     def __init__(self, fid):
         # Import is postponed to here since zipfile depends on gzip, an optional
@@ -123,16 +185,23 @@ class NpzFile(object):
         return iter(self.files)
 
     def items(self):
+        """
+        Return a list of tuples, with each tuple (filename, array in file).
+
+        """
         return [(f, self[f]) for f in self.files]
 
     def iteritems(self):
+        """Generator that returns tuples (filename, array in file)."""
         for f in self.files:
             yield (f, self[f])
 
     def keys(self):
+        """Return files in the archive with a ".npy" extension."""
         return self.files
 
     def iterkeys(self):
+        """Return an iterator over the files in the archive."""
         return self.__iter__()
 
     def __contains__(self, key):
@@ -241,9 +310,13 @@ def save(file, arr):
 
     See Also
     --------
-    savez : Save several arrays into a .npz compressed archive
+    savez : Save several arrays into a ``.npz`` compressed archive
     savetxt, load
 
+    Notes
+    -----
+    For a description of the ``.npy`` format, see `format`.
+
     Examples
     --------
     >>> from tempfile import TemporaryFile
@@ -269,7 +342,7 @@ def save(file, arr):
 
 def savez(file, *args, **kwds):
     """
-    Save several arrays into a single, compressed file with extension ".npz"
+    Save several arrays into a single, compressed file in ``.npz`` format.
 
     If keyword arguments are given, the names for variables assigned to the
     keywords are the keyword names (not the variable names in the caller).
@@ -278,33 +351,57 @@ def savez(file, *args, **kwds):
 
     Parameters
     ----------
-    file : Either the filename (string) or an open file (file-like object)
+    file : str or file
+        Either the file name (string) or an open file (file-like object)
         If file is a string, it names the output file.  ".npz" will be appended
-        if it is not already there.
+        to the file name if it is not already there.
     args : Arguments
         Any function arguments other than the file name are variables to save.
-        Since it is not possible for Python to know their names outside the
-        savez function, they will be saved with names "arr_0", "arr_1", and so
-        on.  These arguments can be any expression.
+        Since it is not possible for Python to know their names outside
+        `savez`, they will be saved with names "arr_0", "arr_1", and so on.
+        These arguments can be any expression.
     kwds : Keyword arguments
         All keyword=value pairs cause the value to be saved with the name of
         the keyword.
 
     See Also
     --------
-    save : Save a single array to a binary file in NumPy format
-    savetxt : Save an array to a file as plain text
+    save : Save a single array to a binary file in NumPy format.
+    savetxt : Save an array to a file as plain text.
 
     Notes
     -----
-    The .npz file format is a zipped archive of files named after the variables
-    they contain.  Each file contains one variable in .npy format.
+    The ``.npz`` file format is a zipped archive of files named after the
+    variables they contain.  Each file contains one variable in ``.npy``
+    format. For a description of the ``.npy`` format, see `format`.
 
     Examples
     --------
-    >>> x = np.random.random((3, 3))
-    >>> y = np.zeros((3, 2))
-    >>> np.savez('data', x=x, y=y)
+    >>> from tempfile import TemporaryFile
+    >>> outfile = TemporaryFile()
+    >>> x = np.arange(10)
+    >>> y = np.sin(x)
+
+    Using `savez` with \\*args, the arrays are saved with default names.
+
+    >>> np.savez(outfile, x, y)
+    >>> outfile.seek(0)  # only necessary in this example (with tempfile)
+    >>> npz = np.load(outfile)
+    >>> npz.files
+    ['arr_1', 'arr_0']
+    >>> npz['arr_0']
+    array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
+
+    Using `savez` with \\*\\*kwds, the arrays are saved with the keyword names.
+
+    >>> outfile = TemporaryFile()
+    >>> np.savez(outfile, x=x, y=y)
+    >>> outfile.seek(0)
+    >>> npz = np.load(outfile)
+    >>> npz.files
+    ['y', 'x']
+    >>> npz['x']
+    array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
 
     """
 
@@ -373,33 +470,33 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, converters=None,
 
     Parameters
     ----------
-    fname : file or string
+    fname : file or str
         File or filename to read.  If the filename extension is ``.gz`` or
         ``.bz2``, the file is first decompressed.
-    dtype : data-type
+    dtype : dtype, optional
         Data type of the resulting array.  If this is a record data-type,
         the resulting array will be 1-dimensional, and each row will be
         interpreted as an element of the array.   In this case, the number
         of columns used must match the number of fields in the data-type.
-    comments : string, optional
+    comments : str, optional
         The character used to indicate the start of a comment.
-    delimiter : string, optional
+    delimiter : str, optional
         The string used to separate values.  By default, this is any
         whitespace.
-    converters : {}
+    converters : dict, optional
         A dictionary mapping column number to a function that will convert
         that column to a float.  E.g., if column 0 is a date string:
         ``converters = {0: datestr2num}``. Converters can also be used to
         provide a default value for missing data:
         ``converters = {3: lambda s: float(s or 0)}``.
-    skiprows : int
+    skiprows : int, optional
         Skip the first `skiprows` lines.
-    usecols : sequence
+    usecols : sequence, optional
         Which columns to read, with 0 being the first.  For example,
         ``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns.
-    unpack : bool
+    unpack : bool, optional
         If True, the returned array is transposed, so that arguments may be
-        unpacked using ``x, y, z = loadtxt(...)``
+        unpacked using ``x, y, z = loadtxt(...)``. Default is False.
 
     Returns
     -------
@@ -408,6 +505,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, converters=None,
 
     See Also
     --------
+    load, fromstring, fromregex
     scipy.io.loadmat : reads Matlab(R) data files
 
     Examples
@@ -425,7 +523,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, converters=None,
           dtype=[('gender', '|S1'), ('age', '<i4'), ('weight', '<f4')])
 
     >>> c = StringIO("1,0,2\\n3,0,4")
-    >>> x,y = np.loadtxt(c, delimiter=',', usecols=(0,2), unpack=True)
+    >>> x, y = np.loadtxt(c, delimiter=',', usecols=(0, 2), unpack=True)
     >>> x
     array([ 1.,  3.])
     >>> y
@@ -575,8 +673,8 @@ def savetxt(fname, X, fmt='%.18e',delimiter=' '):
 
     See Also
     --------
-    save : Save an array to a binary file in NumPy format
-    savez : Save several arrays into an .npz compressed archive
+    save : Save an array to a binary file in NumPy ``.npy`` format
+    savez : Save several arrays into a ``.npz`` compressed archive
 
     Notes
     -----
@@ -686,10 +784,11 @@ def savetxt(fname, X, fmt='%.18e',delimiter=' '):
 import re
 def fromregex(file, regexp, dtype):
     """
-    Construct an array from a text file, using regular-expressions parsing.
+    Construct an array from a text file, using regular expression parsing.
 
-    Array is constructed from all matches of the regular expression
-    in the file. Groups in the regular expression are converted to fields.
+    The returned array is always a structured array, and is constructed from
+    all matches of the regular expression in the file. Groups in the regular
+    expression are converted to fields of the structured array.
 
     Parameters
     ----------
@@ -698,18 +797,44 @@ def fromregex(file, regexp, dtype):
     regexp : str or regexp
         Regular expression used to parse the file.
         Groups in the regular expression correspond to fields in the dtype.
-    dtype : dtype or dtype list
-        Dtype for the structured array
+    dtype : dtype or list of dtypes
+        Dtype for the structured array.
+
+    Returns
+    -------
+    output : ndarray
+        The output array, containing the part of the content of `file` that
+        was matched by `regexp`. `output` is always a structured array.
+
+    Raises
+    ------
+    TypeError
+        When `dtype` is not a valid dtype for a structured array.
+
+    See Also
+    --------
+    fromstring, loadtxt
+
+    Notes
+    -----
+    Dtypes for structured arrays can be specified in several forms, but all
+    forms specify at least the data type and field name. For details see
+    `doc.structured_arrays`.
 
     Examples
     --------
     >>> f = open('test.dat', 'w')
     >>> f.write("1312 foo\\n1534  bar\\n444   qux")
     >>> f.close()
-    >>> np.fromregex('test.dat', r"(\\d+)\\s+(...)",
-    ...              [('num', np.int64), ('key', 'S3')])
+
+    >>> regexp = r"(\\d+)\\s+(...)"  # match [digits, whitespace, anything]
+    >>> output = np.fromregex('test.dat', regexp,
+                              [('num', np.int64), ('key', 'S3')])
+    >>> output
     array([(1312L, 'foo'), (1534L, 'bar'), (444L, 'qux')],
           dtype=[('num', '<i8'), ('key', '|S3')])
+    >>> output['num']
+    array([1312, 1534,  444], dtype=int64)
 
     """
     if not hasattr(file, "read"):
@@ -746,18 +871,18 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, skiprows=0,
                names=None, excludelist=None, deletechars=None,
                case_sensitive=True, unpack=None, usemask=False, loose=True):
     """
-    Load data from a text file.
+    Load data from a text file, with missing values handled as specified.
 
-    Each line past the first `skiprows` ones is split at the `delimiter`
+    Each line past the first `skiprows` lines is split at the `delimiter`
     character, and characters following the `comments` character are discarded.
 
     Parameters
     ----------
-    fname : {file, string}
+    fname : file or str
         File or filename to read.  If the filename extension is `.gz` or
         `.bz2`, the file is first decompressed.
-    dtype : dtype
-        Data type of the resulting array.  If this is a flexible data-type,
+    dtype : dtype, optional
+        Data type of the resulting array.  If this is a structured data type,
         the resulting array will be 1-dimensional, and each row will be
         interpreted as an element of the array. In this case, the number
         of columns used must match the number of fields in the data-type,
@@ -765,56 +890,58 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, skiprows=0,
         of the dtype.
         If None, the dtypes will be determined by the contents of each
         column, individually.
-    comments : string, optional
+    comments : str, optional
         The character used to indicate the start of a comment.
         All the characters occurring on a line after a comment are discarded
-    delimiter : string, optional
+    delimiter : str, int, or sequence, optional
         The string used to separate values.  By default, any consecutive
-        whitespace act as delimiter.
+        whitespaces act as delimiter.  An integer or sequence of integers
+        can also be provided as width(s) of each field.
     skiprows : int, optional
         Numbers of lines to skip at the beginning of the file.
-    converters : {None, dictionary}, optional
+    converters : dict or None, optional
         A dictionary mapping column number to a function that will convert
         values in the column to a number. Converters can also be used to
         provide a default value for missing data:
         ``converters = {3: lambda s: float(s or 0)}``.
-    missing : string, optional
+    missing : str, optional
         A string representing a missing value, irrespective of the column where
         it appears (e.g., `'missing'` or `'unused'`).
-    missing_values : {None, dictionary}, optional
+    missing_values : dict or None, optional
         A dictionary mapping a column number to a string indicating whether the
         corresponding field should be masked.
-    usecols : {None, sequence}, optional
+    usecols : sequence or None, optional
         Which columns to read, with 0 being the first.  For example,
-        ``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns.
-    names : {None, True, string, sequence}, optional
+        ``usecols = (1, 4, 5)`` will extract the 2nd, 5th and 6th columns.
+    names : {None, True, str, sequence}, optional
         If `names` is True, the field names are read from the first valid line
         after the first `skiprows` lines.
         If `names` is a sequence or a single-string of comma-separated names,
-        the names will be used to define the field names in a flexible dtype.
+        the names will be used to define the field names in a structured dtype.
         If `names` is None, the names of the dtype fields will be used, if any.
     excludelist : sequence, optional
         A list of names to exclude. This list is appended to the default list
         ['return','file','print']. Excluded names are appended an underscore:
         for example, `file` would become `file_`.
-    deletechars : string, optional
+    deletechars : str, optional
         A string combining invalid characters that must be deleted from the
         names.
     case_sensitive : {True, False, 'upper', 'lower'}, optional
-        If True, field names are case_sensitive.
+        If True, field names are case sensitive.
         If False or 'upper', field names are converted to upper case.
         If 'lower', field names are converted to lower case.
     unpack : bool, optional
         If True, the returned array is transposed, so that arguments may be
         unpacked using ``x, y, z = loadtxt(...)``
     usemask : bool, optional
-        If True, returns a masked array.
-        If False, return a regular standard array.
+        If True, return a masked array.
+        If False, return a regular array.
 
     Returns
     -------
-    out : MaskedArray
-        Data read from the text file.
+    out : ndarray
+        Data read from the text file. If `usemask` is True, this is a
+        masked array.
 
     See Also
     --------
@@ -824,12 +951,53 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, skiprows=0,
     -----
     * When spaces are used as delimiters, or when no delimiter has been given
       as input, there should not be any missing data between two fields.
-    * When the variable are named (either by a flexible dtype or with `names`,
-      there must not be any header in the file (else a :exc:ValueError
+    * When the variables are named (either by a flexible dtype or with `names`,
+      there must not be any header in the file (else a ValueError
       exception is raised).
     * Individual values are not stripped of spaces by default.
       When using a custom converter, make sure the function does remove spaces.
 
+    Examples
+    ---------
+    >>> from StringIO import StringIO
+    >>> import numpy as np
+
+    Comma delimited file with mixed dtype
+
+    >>> s = StringIO("1,1.3,abcde")
+    >>> data = np.genfromtxt(s, dtype=[('myint','i8'),('myfloat','f8'),
+        ('mystring','S5')], delimiter=",")
+    >>> data
+    array((1, 1.3, 'abcde'),
+          dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '|S5')])
+
+    Using dtype = None
+
+    >>> s.seek(0) # needed for StringIO example only
+    >>> data = np.genfromtxt(s, dtype=None,
+        names = ['myint','myfloat','mystring'], delimiter=",")
+    >>> data
+    array((1, 1.3, 'abcde'),
+          dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '|S5')])
+
+    Specifying dtype and names
+
+    >>> s.seek(0)
+    >>> data = np.genfromtxt(s, dtype="i8,f8,S5",
+        names=['myint','myfloat','mystring'], delimiter=",")
+    >>> data
+    array((1, 1.3, 'abcde'),
+          dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '|S5')])
+
+    An example with fixed-width columns
+
+    >>> s = StringIO("11.3abcde")
+    >>> data = np.genfromtxt(s, dtype=None, names=['intvar','fltvar','strvar'],
+            delimiter=[1,3,5])
+    >>> data
+    array((1, 1.3, 'abcde'),
+          dtype=[('intvar', '<i8'), ('fltvar', '<f8'), ('strvar', '|S5')])
+
     """
     #
     if usemask:
@@ -1114,15 +1282,15 @@ def ndfromtxt(fname, dtype=float, comments='#', delimiter=None, skiprows=0,
              usecols=None, unpack=None, names=None,
              excludelist=None, deletechars=None, case_sensitive=True,):
     """
-    Load ASCII data stored in fname and returns a ndarray.
-    
+    Load ASCII data stored in a file and return it as a single array.
+
     Complete description of all the optional input parameters is available in
     the docstring of the `genfromtxt` function.
-    
+
     See Also
     --------
     numpy.genfromtxt : generic function.
-    
+
     """
     kwargs = dict(dtype=dtype, comments=comments, delimiter=delimiter, 
                   skiprows=skiprows, converters=converters,
@@ -1137,14 +1305,14 @@ def mafromtxt(fname, dtype=float, comments='#', delimiter=None, skiprows=0,
               usecols=None, unpack=None, names=None,
               excludelist=None, deletechars=None, case_sensitive=True,):
     """
-    Load ASCII data stored in fname and returns a MaskedArray.
-    
-    Complete description of all the optional input parameters is available in
-    the docstring of the `genfromtxt` function.
-    
+    Load ASCII data stored in a text file and return a masked array.
+
+    For a complete description of all the input parameters, see `genfromtxt`.
+
     See Also
     --------
-    numpy.genfromtxt : generic function.
+    numpy.genfromtxt : generic function to load ASCII data.
+
     """
     kwargs = dict(dtype=dtype, comments=comments, delimiter=delimiter, 
                   skiprows=skiprows, converters=converters,
@@ -1162,8 +1330,10 @@ def recfromtxt(fname, dtype=None, comments='#', delimiter=None, skiprows=0,
                excludelist=None, deletechars=None, case_sensitive=True,
                usemask=False):
     """
-    Load ASCII data stored in fname and returns a standard recarray (if
-    `usemask=False`) or a MaskedRecords (if `usemask=True`).
+    Load ASCII data from a file and return it in a record array.
+
+    If ``usemask=False`` a standard `recarray` is returned,
+    if ``usemask=True`` a MaskedRecords array is returned.
 
     Complete description of all the optional input parameters is available in
     the docstring of the `genfromtxt` function.
@@ -1174,8 +1344,8 @@ def recfromtxt(fname, dtype=None, comments='#', delimiter=None, skiprows=0,
 
     Notes
     -----
-    * by default, `dtype=None`, which means that the dtype of the output array
-      will be determined from the data.
+    By default, `dtype` is None, which means that the data-type of the output
+    array will be determined from the data.
 
     """
     kwargs = dict(dtype=dtype, comments=comments, delimiter=delimiter, 
@@ -1199,15 +1369,18 @@ def recfromcsv(fname, dtype=None, comments='#', skiprows=0,
                excludelist=None, deletechars=None, case_sensitive='lower',
                usemask=False):
     """
-    Load ASCII data stored in comma-separated file and returns a recarray (if 
-    `usemask=False`) or a MaskedRecords (if `usemask=True`).
-    
-    Complete description of all the optional input parameters is available in
-    the docstring of the `genfromtxt` function.
-    
+    Load ASCII data stored in a comma-separated file.
+
+    The returned array is a record array (if ``usemask=False``, see
+    `recarray`) or a masked record array (if ``usemask=True``,
+    see `ma.mrecords.MaskedRecords`).
+
+    For a complete description of all the input parameters, see `genfromtxt`.
+
     See Also
     --------
-    numpy.genfromtxt : generic function
+    numpy.genfromtxt : generic function to load ASCII data.
+
     """
     kwargs = dict(dtype=dtype, comments=comments, delimiter=",", 
                   skiprows=skiprows, converters=converters,