summaryrefslogtreecommitdiff
path: root/numpy/lib/io.py
diff options
context:
space:
mode:
Diffstat (limited to 'numpy/lib/io.py')
-rw-r--r--numpy/lib/io.py339
1 files changed, 256 insertions, 83 deletions
diff --git a/numpy/lib/io.py b/numpy/lib/io.py
index 3a962c7e1..b79286f30 100644
--- a/numpy/lib/io.py
+++ b/numpy/lib/io.py
@@ -57,8 +57,27 @@ def seek_gzip_factory(f):
return f
class BagObj(object):
- """A simple class that converts attribute lookups to
- getitems on the class passed in.
+ """
+ BagObj(obj)
+
+ Convert attribute lookups to getitems on the object passed in.
+
+ Parameters
+ ----------
+ obj : class instance
+ Object on which attribute lookup is performed.
+
+ Examples
+ --------
+ >>> class BagDemo(object):
+ ... def __getitem__(self, key):
+ ... return key
+ ...
+ >>> demo_obj = BagDemo()
+ >>> bagobj = np.lib.io.BagObj(demo_obj)
+ >>> bagobj.some_item
+ 'some_item'
+
"""
def __init__(self, obj):
self._obj = obj
@@ -69,14 +88,57 @@ class BagObj(object):
raise AttributeError, key
class NpzFile(object):
- """A dictionary-like object with lazy-loading of files in the zipped
+ """
+ NpzFile(fid)
+
+ A dictionary-like object with lazy-loading of files in the zipped
archive provided on construction.
+ `NpzFile` is used to load files in the NumPy ``.npz`` data archive
+ format. It assumes that files in the archive have a ".npy" extension,
+ other files are ignored.
+
The arrays and file strings are lazily loaded on either
- getitem access using obj['key'] or attribute lookup using obj.f.key
+ getitem access using ``obj['key']`` or attribute lookup using
+ ``obj.f.key``. A list of all files (without ".npy" extensions) can
+ be obtained with ``obj.files`` and the ZipFile object itself using
+ ``obj.zip``.
+
+ Attributes
+ ----------
+ files : list of str
+ List of all files in the archive with a ".npy" extension.
+ zip : ZipFile instance
+ The ZipFile object initialized with the zipped archive.
+ f : BagObj instance
+ An object on which attribute can be performed as an alternative
+ to getitem access on the `NpzFile` instance itself.
+
+ Parameters
+ ----------
+ fid : file or str
+ The zipped archive to open. This is either a file-like object
+ or a string containing the path to the archive.
+
+ Examples
+ --------
+ >>> from tempfile import TemporaryFile
+ >>> outfile = TemporaryFile()
+ >>> x = np.arange(10)
+ >>> y = np.sin(x)
+ >>> np.savez(outfile, x=x, y=y)
+ >>> outfile.seek(0)
+
+ >>> npz = np.load(outfile)
+ >>> isinstance(npz, np.lib.io.NpzFile)
+ True
+ >>> npz.files
+ ['y', 'x']
+ >>> npz['x'] # getitem access
+ array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
+ >>> npz.f.x # attribute lookup
+ array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
- A list of all files (without .npy) extensions can be obtained
- with .files and the ZipFile object itself using .zip
"""
def __init__(self, fid):
# Import is postponed to here since zipfile depends on gzip, an optional
@@ -123,16 +185,23 @@ class NpzFile(object):
return iter(self.files)
def items(self):
+ """
+ Return a list of tuples, with each tuple (filename, array in file).
+
+ """
return [(f, self[f]) for f in self.files]
def iteritems(self):
+ """Generator that returns tuples (filename, array in file)."""
for f in self.files:
yield (f, self[f])
def keys(self):
+ """Return files in the archive with a ".npy" extension."""
return self.files
def iterkeys(self):
+ """Return an iterator over the files in the archive."""
return self.__iter__()
def __contains__(self, key):
@@ -241,9 +310,13 @@ def save(file, arr):
See Also
--------
- savez : Save several arrays into a .npz compressed archive
+ savez : Save several arrays into a ``.npz`` compressed archive
savetxt, load
+ Notes
+ -----
+ For a description of the ``.npy`` format, see `format`.
+
Examples
--------
>>> from tempfile import TemporaryFile
@@ -269,7 +342,7 @@ def save(file, arr):
def savez(file, *args, **kwds):
"""
- Save several arrays into a single, compressed file with extension ".npz"
+ Save several arrays into a single, compressed file in ``.npz`` format.
If keyword arguments are given, the names for variables assigned to the
keywords are the keyword names (not the variable names in the caller).
@@ -278,33 +351,57 @@ def savez(file, *args, **kwds):
Parameters
----------
- file : Either the filename (string) or an open file (file-like object)
+ file : str or file
+ Either the file name (string) or an open file (file-like object)
If file is a string, it names the output file. ".npz" will be appended
- if it is not already there.
+ to the file name if it is not already there.
args : Arguments
Any function arguments other than the file name are variables to save.
- Since it is not possible for Python to know their names outside the
- savez function, they will be saved with names "arr_0", "arr_1", and so
- on. These arguments can be any expression.
+ Since it is not possible for Python to know their names outside
+ `savez`, they will be saved with names "arr_0", "arr_1", and so on.
+ These arguments can be any expression.
kwds : Keyword arguments
All keyword=value pairs cause the value to be saved with the name of
the keyword.
See Also
--------
- save : Save a single array to a binary file in NumPy format
- savetxt : Save an array to a file as plain text
+ save : Save a single array to a binary file in NumPy format.
+ savetxt : Save an array to a file as plain text.
Notes
-----
- The .npz file format is a zipped archive of files named after the variables
- they contain. Each file contains one variable in .npy format.
+ The ``.npz`` file format is a zipped archive of files named after the
+ variables they contain. Each file contains one variable in ``.npy``
+ format. For a description of the ``.npy`` format, see `format`.
Examples
--------
- >>> x = np.random.random((3, 3))
- >>> y = np.zeros((3, 2))
- >>> np.savez('data', x=x, y=y)
+ >>> from tempfile import TemporaryFile
+ >>> outfile = TemporaryFile()
+ >>> x = np.arange(10)
+ >>> y = np.sin(x)
+
+ Using `savez` with \\*args, the arrays are saved with default names.
+
+ >>> np.savez(outfile, x, y)
+ >>> outfile.seek(0) # only necessary in this example (with tempfile)
+ >>> npz = np.load(outfile)
+ >>> npz.files
+ ['arr_1', 'arr_0']
+ >>> npz['arr_0']
+ array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
+
+ Using `savez` with \\*\\*kwds, the arrays are saved with the keyword names.
+
+ >>> outfile = TemporaryFile()
+ >>> np.savez(outfile, x=x, y=y)
+ >>> outfile.seek(0)
+ >>> npz = np.load(outfile)
+ >>> npz.files
+ ['y', 'x']
+ >>> npz['x']
+ array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])
"""
@@ -373,33 +470,33 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, converters=None,
Parameters
----------
- fname : file or string
+ fname : file or str
File or filename to read. If the filename extension is ``.gz`` or
``.bz2``, the file is first decompressed.
- dtype : data-type
+ dtype : dtype, optional
Data type of the resulting array. If this is a record data-type,
the resulting array will be 1-dimensional, and each row will be
interpreted as an element of the array. In this case, the number
of columns used must match the number of fields in the data-type.
- comments : string, optional
+ comments : str, optional
The character used to indicate the start of a comment.
- delimiter : string, optional
+ delimiter : str, optional
The string used to separate values. By default, this is any
whitespace.
- converters : {}
+ converters : dict, optional
A dictionary mapping column number to a function that will convert
that column to a float. E.g., if column 0 is a date string:
``converters = {0: datestr2num}``. Converters can also be used to
provide a default value for missing data:
``converters = {3: lambda s: float(s or 0)}``.
- skiprows : int
+ skiprows : int, optional
Skip the first `skiprows` lines.
- usecols : sequence
+ usecols : sequence, optional
Which columns to read, with 0 being the first. For example,
``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns.
- unpack : bool
+ unpack : bool, optional
If True, the returned array is transposed, so that arguments may be
- unpacked using ``x, y, z = loadtxt(...)``
+ unpacked using ``x, y, z = loadtxt(...)``. Default is False.
Returns
-------
@@ -408,6 +505,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, converters=None,
See Also
--------
+ load, fromstring, fromregex
scipy.io.loadmat : reads Matlab(R) data files
Examples
@@ -425,7 +523,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, converters=None,
dtype=[('gender', '|S1'), ('age', '<i4'), ('weight', '<f4')])
>>> c = StringIO("1,0,2\\n3,0,4")
- >>> x,y = np.loadtxt(c, delimiter=',', usecols=(0,2), unpack=True)
+ >>> x, y = np.loadtxt(c, delimiter=',', usecols=(0, 2), unpack=True)
>>> x
array([ 1., 3.])
>>> y
@@ -575,8 +673,8 @@ def savetxt(fname, X, fmt='%.18e',delimiter=' '):
See Also
--------
- save : Save an array to a binary file in NumPy format
- savez : Save several arrays into an .npz compressed archive
+ save : Save an array to a binary file in NumPy ``.npy`` format
+ savez : Save several arrays into a ``.npz`` compressed archive
Notes
-----
@@ -686,10 +784,11 @@ def savetxt(fname, X, fmt='%.18e',delimiter=' '):
import re
def fromregex(file, regexp, dtype):
"""
- Construct an array from a text file, using regular-expressions parsing.
+ Construct an array from a text file, using regular expression parsing.
- Array is constructed from all matches of the regular expression
- in the file. Groups in the regular expression are converted to fields.
+ The returned array is always a structured array, and is constructed from
+ all matches of the regular expression in the file. Groups in the regular
+ expression are converted to fields of the structured array.
Parameters
----------
@@ -698,18 +797,44 @@ def fromregex(file, regexp, dtype):
regexp : str or regexp
Regular expression used to parse the file.
Groups in the regular expression correspond to fields in the dtype.
- dtype : dtype or dtype list
- Dtype for the structured array
+ dtype : dtype or list of dtypes
+ Dtype for the structured array.
+
+ Returns
+ -------
+ output : ndarray
+ The output array, containing the part of the content of `file` that
+ was matched by `regexp`. `output` is always a structured array.
+
+ Raises
+ ------
+ TypeError
+ When `dtype` is not a valid dtype for a structured array.
+
+ See Also
+ --------
+ fromstring, loadtxt
+
+ Notes
+ -----
+ Dtypes for structured arrays can be specified in several forms, but all
+ forms specify at least the data type and field name. For details see
+ `doc.structured_arrays`.
Examples
--------
>>> f = open('test.dat', 'w')
>>> f.write("1312 foo\\n1534 bar\\n444 qux")
>>> f.close()
- >>> np.fromregex('test.dat', r"(\\d+)\\s+(...)",
- ... [('num', np.int64), ('key', 'S3')])
+
+ >>> regexp = r"(\\d+)\\s+(...)" # match [digits, whitespace, anything]
+ >>> output = np.fromregex('test.dat', regexp,
+ [('num', np.int64), ('key', 'S3')])
+ >>> output
array([(1312L, 'foo'), (1534L, 'bar'), (444L, 'qux')],
dtype=[('num', '<i8'), ('key', '|S3')])
+ >>> output['num']
+ array([1312, 1534, 444], dtype=int64)
"""
if not hasattr(file, "read"):
@@ -746,18 +871,18 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, skiprows=0,
names=None, excludelist=None, deletechars=None,
case_sensitive=True, unpack=None, usemask=False, loose=True):
"""
- Load data from a text file.
+ Load data from a text file, with missing values handled as specified.
- Each line past the first `skiprows` ones is split at the `delimiter`
+ Each line past the first `skiprows` lines is split at the `delimiter`
character, and characters following the `comments` character are discarded.
Parameters
----------
- fname : {file, string}
+ fname : file or str
File or filename to read. If the filename extension is `.gz` or
`.bz2`, the file is first decompressed.
- dtype : dtype
- Data type of the resulting array. If this is a flexible data-type,
+ dtype : dtype, optional
+ Data type of the resulting array. If this is a structured data type,
the resulting array will be 1-dimensional, and each row will be
interpreted as an element of the array. In this case, the number
of columns used must match the number of fields in the data-type,
@@ -765,56 +890,58 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, skiprows=0,
of the dtype.
If None, the dtypes will be determined by the contents of each
column, individually.
- comments : string, optional
+ comments : str, optional
The character used to indicate the start of a comment.
All the characters occurring on a line after a comment are discarded
- delimiter : string, optional
+ delimiter : str, int, or sequence, optional
The string used to separate values. By default, any consecutive
- whitespace act as delimiter.
+ whitespaces act as delimiter. An integer or sequence of integers
+ can also be provided as width(s) of each field.
skiprows : int, optional
Numbers of lines to skip at the beginning of the file.
- converters : {None, dictionary}, optional
+ converters : dict or None, optional
A dictionary mapping column number to a function that will convert
values in the column to a number. Converters can also be used to
provide a default value for missing data:
``converters = {3: lambda s: float(s or 0)}``.
- missing : string, optional
+ missing : str, optional
A string representing a missing value, irrespective of the column where
it appears (e.g., `'missing'` or `'unused'`).
- missing_values : {None, dictionary}, optional
+ missing_values : dict or None, optional
A dictionary mapping a column number to a string indicating whether the
corresponding field should be masked.
- usecols : {None, sequence}, optional
+ usecols : sequence or None, optional
Which columns to read, with 0 being the first. For example,
- ``usecols = (1,4,5)`` will extract the 2nd, 5th and 6th columns.
- names : {None, True, string, sequence}, optional
+ ``usecols = (1, 4, 5)`` will extract the 2nd, 5th and 6th columns.
+ names : {None, True, str, sequence}, optional
If `names` is True, the field names are read from the first valid line
after the first `skiprows` lines.
If `names` is a sequence or a single-string of comma-separated names,
- the names will be used to define the field names in a flexible dtype.
+ the names will be used to define the field names in a structured dtype.
If `names` is None, the names of the dtype fields will be used, if any.
excludelist : sequence, optional
A list of names to exclude. This list is appended to the default list
['return','file','print']. Excluded names are appended an underscore:
for example, `file` would become `file_`.
- deletechars : string, optional
+ deletechars : str, optional
A string combining invalid characters that must be deleted from the
names.
case_sensitive : {True, False, 'upper', 'lower'}, optional
- If True, field names are case_sensitive.
+ If True, field names are case sensitive.
If False or 'upper', field names are converted to upper case.
If 'lower', field names are converted to lower case.
unpack : bool, optional
If True, the returned array is transposed, so that arguments may be
unpacked using ``x, y, z = loadtxt(...)``
usemask : bool, optional
- If True, returns a masked array.
- If False, return a regular standard array.
+ If True, return a masked array.
+ If False, return a regular array.
Returns
-------
- out : MaskedArray
- Data read from the text file.
+ out : ndarray
+ Data read from the text file. If `usemask` is True, this is a
+ masked array.
See Also
--------
@@ -824,12 +951,53 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, skiprows=0,
-----
* When spaces are used as delimiters, or when no delimiter has been given
as input, there should not be any missing data between two fields.
- * When the variable are named (either by a flexible dtype or with `names`,
- there must not be any header in the file (else a :exc:ValueError
+ * When the variables are named (either by a flexible dtype or with `names`,
+ there must not be any header in the file (else a ValueError
exception is raised).
* Individual values are not stripped of spaces by default.
When using a custom converter, make sure the function does remove spaces.
+ Examples
+ ---------
+ >>> from StringIO import StringIO
+ >>> import numpy as np
+
+ Comma delimited file with mixed dtype
+
+ >>> s = StringIO("1,1.3,abcde")
+ >>> data = np.genfromtxt(s, dtype=[('myint','i8'),('myfloat','f8'),
+ ('mystring','S5')], delimiter=",")
+ >>> data
+ array((1, 1.3, 'abcde'),
+ dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '|S5')])
+
+ Using dtype = None
+
+ >>> s.seek(0) # needed for StringIO example only
+ >>> data = np.genfromtxt(s, dtype=None,
+ names = ['myint','myfloat','mystring'], delimiter=",")
+ >>> data
+ array((1, 1.3, 'abcde'),
+ dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '|S5')])
+
+ Specifying dtype and names
+
+ >>> s.seek(0)
+ >>> data = np.genfromtxt(s, dtype="i8,f8,S5",
+ names=['myint','myfloat','mystring'], delimiter=",")
+ >>> data
+ array((1, 1.3, 'abcde'),
+ dtype=[('myint', '<i8'), ('myfloat', '<f8'), ('mystring', '|S5')])
+
+ An example with fixed-width columns
+
+ >>> s = StringIO("11.3abcde")
+ >>> data = np.genfromtxt(s, dtype=None, names=['intvar','fltvar','strvar'],
+ delimiter=[1,3,5])
+ >>> data
+ array((1, 1.3, 'abcde'),
+ dtype=[('intvar', '<i8'), ('fltvar', '<f8'), ('strvar', '|S5')])
+
"""
#
if usemask:
@@ -1114,15 +1282,15 @@ def ndfromtxt(fname, dtype=float, comments='#', delimiter=None, skiprows=0,
usecols=None, unpack=None, names=None,
excludelist=None, deletechars=None, case_sensitive=True,):
"""
- Load ASCII data stored in fname and returns a ndarray.
-
+ Load ASCII data stored in a file and return it as a single array.
+
Complete description of all the optional input parameters is available in
the docstring of the `genfromtxt` function.
-
+
See Also
--------
numpy.genfromtxt : generic function.
-
+
"""
kwargs = dict(dtype=dtype, comments=comments, delimiter=delimiter,
skiprows=skiprows, converters=converters,
@@ -1137,14 +1305,14 @@ def mafromtxt(fname, dtype=float, comments='#', delimiter=None, skiprows=0,
usecols=None, unpack=None, names=None,
excludelist=None, deletechars=None, case_sensitive=True,):
"""
- Load ASCII data stored in fname and returns a MaskedArray.
-
- Complete description of all the optional input parameters is available in
- the docstring of the `genfromtxt` function.
-
+ Load ASCII data stored in a text file and return a masked array.
+
+ For a complete description of all the input parameters, see `genfromtxt`.
+
See Also
--------
- numpy.genfromtxt : generic function.
+ numpy.genfromtxt : generic function to load ASCII data.
+
"""
kwargs = dict(dtype=dtype, comments=comments, delimiter=delimiter,
skiprows=skiprows, converters=converters,
@@ -1162,8 +1330,10 @@ def recfromtxt(fname, dtype=None, comments='#', delimiter=None, skiprows=0,
excludelist=None, deletechars=None, case_sensitive=True,
usemask=False):
"""
- Load ASCII data stored in fname and returns a standard recarray (if
- `usemask=False`) or a MaskedRecords (if `usemask=True`).
+ Load ASCII data from a file and return it in a record array.
+
+ If ``usemask=False`` a standard `recarray` is returned,
+ if ``usemask=True`` a MaskedRecords array is returned.
Complete description of all the optional input parameters is available in
the docstring of the `genfromtxt` function.
@@ -1174,8 +1344,8 @@ def recfromtxt(fname, dtype=None, comments='#', delimiter=None, skiprows=0,
Notes
-----
- * by default, `dtype=None`, which means that the dtype of the output array
- will be determined from the data.
+ By default, `dtype` is None, which means that the data-type of the output
+ array will be determined from the data.
"""
kwargs = dict(dtype=dtype, comments=comments, delimiter=delimiter,
@@ -1199,15 +1369,18 @@ def recfromcsv(fname, dtype=None, comments='#', skiprows=0,
excludelist=None, deletechars=None, case_sensitive='lower',
usemask=False):
"""
- Load ASCII data stored in comma-separated file and returns a recarray (if
- `usemask=False`) or a MaskedRecords (if `usemask=True`).
-
- Complete description of all the optional input parameters is available in
- the docstring of the `genfromtxt` function.
-
+ Load ASCII data stored in a comma-separated file.
+
+ The returned array is a record array (if ``usemask=False``, see
+ `recarray`) or a masked record array (if ``usemask=True``,
+ see `ma.mrecords.MaskedRecords`).
+
+ For a complete description of all the input parameters, see `genfromtxt`.
+
See Also
--------
- numpy.genfromtxt : generic function
+ numpy.genfromtxt : generic function to load ASCII data.
+
"""
kwargs = dict(dtype=dtype, comments=comments, delimiter=",",
skiprows=skiprows, converters=converters,