summaryrefslogtreecommitdiff
path: root/numpy/lib/io.py
diff options
context:
space:
mode:
Diffstat (limited to 'numpy/lib/io.py')
-rw-r--r--numpy/lib/io.py360
1 files changed, 360 insertions, 0 deletions
diff --git a/numpy/lib/io.py b/numpy/lib/io.py
new file mode 100644
index 000000000..b4e89c2fc
--- /dev/null
+++ b/numpy/lib/io.py
@@ -0,0 +1,360 @@
+
+__all__ = ['savetxt', 'loadtxt',
+ 'load', 'loads',
+ 'save', 'savez',
+ 'packbits', 'unpackbits',
+ 'DataSource']
+
+import numpy as np
+import format
+import zipfile
+import cStringIO
+import tempfile
+import os
+
+from cPickle import load as _cload, loads
+from _datasource import DataSource
+from _compiled_base import packbits, unpackbits
+
+_file = file
+
+class BagObj(object):
+ """A simple class that converts attribute lookups to
+ getitems on the class passed in.
+ """
+ def __init__(self, obj):
+ self._obj = obj
+ def __getattribute__(self, key):
+ try:
+ return object.__getattribute__(self, '_obj')[key]
+ except KeyError:
+ raise AttributeError, key
+
+class NpzFile(object):
+ """A dictionary-like object with lazy-loading of files in the zipped
+ archive provided on construction.
+
+ The arrays and file strings are lazily loaded on either
+ getitem access using obj['key'] or attribute lookup using obj.f.key
+
+ A list of all files (without .npy) extensions can be obtained
+ with .files and the ZipFile object itself using .zip
+ """
+ def __init__(self, fid):
+ _zip = zipfile.ZipFile(fid)
+ self._files = _zip.namelist()
+ self.files = []
+ for x in self._files:
+ if x.endswith('.npy'):
+ self.files.append(x[:-4])
+ else:
+ self.files.append(x)
+ self.zip = _zip
+ self.f = BagObj(self)
+
+ def __getitem__(self, key):
+ # FIXME: This seems like it will copy strings around
+ # more than is strictly necessary. The zipfile
+ # will read the string and then
+ # the format.read_array will copy the string
+ # to another place in memory.
+ # It would be better if the zipfile could read
+ # (or at least uncompress) the data
+ # directly into the array memory.
+ member = 0
+ if key in self._files:
+ member = 1
+ elif key in self.files:
+ member = 1
+ key += '.npy'
+ if member:
+ bytes = self.zip.read(key)
+ if bytes.startswith(format.MAGIC_PREFIX):
+ value = cStringIO.StringIO(bytes)
+ return format.read_array(value)
+ else:
+ return bytes
+ else:
+ raise KeyError, "%s is not a file in the archive" % key
+
+def load(file, memmap=False):
+ """Load a binary file.
+
+ Read a binary file (either a pickle, or a binary .npy/.npz file) and
+ return the result.
+
+ Parameters
+ ----------
+ file : file-like object or string
+ the file to read. It must support seek and read methods
+ memmap : bool
+ If true, then memory-map the .npy file or unzip the .npz file into
+ a temporary directory and memory-map each component
+ This has no effect for a pickle.
+
+ Returns
+ -------
+ result : array, tuple, dict, etc.
+ data stored in the file.
+ If file contains pickle data, then whatever is stored in the pickle is returned.
+ If the file is .npy file, then an array is returned.
+ If the file is .npz file, then a dictionary-like object is returned which has a
+ filename:array key:value pair for every file in the zip.
+ """
+ if isinstance(file, type("")):
+ fid = _file(file,"rb")
+ else:
+ fid = file
+
+ if memmap:
+ raise NotImplementedError
+
+ # Code to distinguish from NumPy binary files and pickles.
+ _ZIP_PREFIX = 'PK\x03\x04'
+ N = len(format.MAGIC_PREFIX)
+ magic = fid.read(N)
+ fid.seek(-N,1) # back-up
+ if magic.startswith(_ZIP_PREFIX): # zip-file (assume .npz)
+ return NpzFile(fid)
+ elif magic == format.MAGIC_PREFIX: # .npy file
+ return format.read_array(fid)
+ else: # Try a pickle
+ try:
+ return _cload(fid)
+ except:
+ raise IOError, \
+ "Failed to interpret file %s as a pickle" % repr(file)
+
+def save(file, arr):
+ """Save an array to a binary file (a string or file-like object).
+
+ If the file is a string, then if it does not have the .npy extension,
+ it is appended and a file open.
+
+ Data is saved to the open file in NumPy-array format
+
+ Examples
+ --------
+ import numpy as np
+ ...
+ np.save('myfile', a)
+ a = np.load('myfile.npy')
+ """
+ if isinstance(file, str):
+ if not file.endswith('.npy'):
+ file = file + '.npy'
+ fid = open(file, "wb")
+ else:
+ fid = file
+
+ arr = np.asanyarray(arr)
+ format.write_array(fid, arr)
+
+def savez(file, *args, **kwds):
+ """Save several arrays into an .npz file format which is a zipped-archive
+ of arrays
+
+ If keyword arguments are given, then filenames are taken from the keywords.
+ If arguments are passed in with no keywords, then stored file names are
+ arr_0, arr_1, etc.
+ """
+
+ if isinstance(file, str):
+ if not file.endswith('.npz'):
+ file = file + '.npz'
+
+ namedict = kwds
+ for i, val in enumerate(args):
+ key = 'arr_%d' % i
+ if key in namedict.keys():
+ raise ValueError, "Cannot use un-named variables and keyword %s" % key
+ namedict[key] = val
+
+ zip = zipfile.ZipFile(file, mode="w")
+
+ # Place to write temporary .npy files
+ # before storing them in the zip
+ direc = tempfile.gettempdir()
+ todel = []
+
+ for key, val in namedict.iteritems():
+ fname = key + '.npy'
+ filename = os.path.join(direc, fname)
+ todel.append(filename)
+ fid = open(filename,'wb')
+ format.write_array(fid, np.asanyarray(val))
+ fid.close()
+ zip.write(filename, arcname=fname)
+
+ zip.close()
+ for name in todel:
+ os.remove(name)
+
+# Adapted from matplotlib
+
+def _getconv(dtype):
+ typ = dtype.type
+ if issubclass(typ, np.bool_):
+ return lambda x: bool(int(x))
+ if issubclass(typ, np.integer):
+ return int
+ elif issubclass(typ, np.floating):
+ return float
+ elif issubclass(typ, np.complex):
+ return complex
+ else:
+ return str
+
+
+def _string_like(obj):
+ try: obj + ''
+ except (TypeError, ValueError): return 0
+ return 1
+
+def loadtxt(fname, dtype=float, comments='#', delimiter=None, converters=None,
+ skiprows=0, usecols=None, unpack=False):
+ """
+ Load ASCII data from fname into an array and return the array.
+
+ The data must be regular, same number of values in every row
+
+ fname can be a filename or a file handle. Support for gzipped files is
+ automatic, if the filename ends in .gz
+
+ See scipy.io.loadmat to read and write matfiles.
+
+ Example usage:
+
+ X = loadtxt('test.dat') # data in two columns
+ t = X[:,0]
+ y = X[:,1]
+
+ Alternatively, you can do the same with "unpack"; see below
+
+ X = loadtxt('test.dat') # a matrix of data
+ x = loadtxt('test.dat') # a single column of data
+
+
+ dtype - the data-type of the resulting array. If this is a
+ record data-type, the the resulting array will be 1-d and each row will
+ be interpreted as an element of the array. The number of columns
+ used must match the number of fields in the data-type in this case.
+
+ comments - the character used to indicate the start of a comment
+ in the file
+
+ delimiter is a string-like character used to seperate values in the
+ file. If delimiter is unspecified or none, any whitespace string is
+ a separator.
+
+ converters, if not None, is a dictionary mapping column number to
+ a function that will convert that column to a float. Eg, if
+ column 0 is a date string: converters={0:datestr2num}
+
+ skiprows is the number of rows from the top to skip
+
+ usecols, if not None, is a sequence of integer column indexes to
+ extract where 0 is the first column, eg usecols=(1,4,5) to extract
+ just the 2nd, 5th and 6th columns
+
+ unpack, if True, will transpose the matrix allowing you to unpack
+ into named arguments on the left hand side
+
+ t,y = load('test.dat', unpack=True) # for two column data
+ x,y,z = load('somefile.dat', usecols=(3,5,7), unpack=True)
+
+ """
+
+ if _string_like(fname):
+ if fname.endswith('.gz'):
+ import gzip
+ fh = gzip.open(fname)
+ else:
+ fh = file(fname)
+ elif hasattr(fname, 'seek'):
+ fh = fname
+ else:
+ raise ValueError('fname must be a string or file handle')
+ X = []
+
+ dtype = np.dtype(dtype)
+ defconv = _getconv(dtype)
+ converterseq = None
+ if converters is None:
+ converters = {}
+ if dtype.names is not None:
+ converterseq = [_getconv(dtype.fields[name][0]) \
+ for name in dtype.names]
+
+ for i,line in enumerate(fh):
+ if i<skiprows: continue
+ line = line[:line.find(comments)].strip()
+ if not len(line): continue
+ vals = line.split(delimiter)
+ if converterseq is None:
+ converterseq = [converters.get(j,defconv) \
+ for j in xrange(len(vals))]
+ if usecols is not None:
+ row = [converterseq[j](vals[j]) for j in usecols]
+ else:
+ row = [converterseq[j](val) for j,val in enumerate(vals)]
+ if dtype.names is not None:
+ row = tuple(row)
+ X.append(row)
+
+ X = np.array(X, dtype)
+ r,c = X.shape
+ if r==1 or c==1:
+ X.shape = max([r,c]),
+ if unpack: return X.T
+ else: return X
+
+
+# adjust so that fmt can change across columns if desired.
+
+def savetxt(fname, X, fmt='%.18e',delimiter=' '):
+ """
+ Save the data in X to file fname using fmt string to convert the
+ data to strings
+
+ fname can be a filename or a file handle. If the filename ends in .gz,
+ the file is automatically saved in compressed gzip format. The load()
+ command understands gzipped files transparently.
+
+ Example usage:
+
+ save('test.out', X) # X is an array
+ save('test1.out', (x,y,z)) # x,y,z equal sized 1D arrays
+ save('test2.out', x) # x is 1D
+ save('test3.out', x, fmt='%1.4e') # use exponential notation
+
+ delimiter is used to separate the fields, eg delimiter ',' for
+ comma-separated values
+ """
+
+ if _string_like(fname):
+ if fname.endswith('.gz'):
+ import gzip
+ fh = gzip.open(fname,'wb')
+ else:
+ fh = file(fname,'w')
+ elif hasattr(fname, 'seek'):
+ fh = fname
+ else:
+ raise ValueError('fname must be a string or file handle')
+
+
+ X = np.asarray(X)
+ origShape = None
+ if len(X.shape)==1:
+ origShape = X.shape
+ X.shape = len(X), 1
+ for row in X:
+ fh.write(delimiter.join([fmt%val for val in row]) + '\n')
+
+ if origShape is not None:
+ X.shape = origShape
+
+
+
+