diff options
author | pierregm <pierregmcode@gmail.com> | 2010-10-11 23:02:10 +0200 |
---|---|---|
committer | pierregm <pierregmcode@gmail.com> | 2010-10-11 23:02:10 +0200 |
commit | a14dd542532d383610c1b01c5698b137dd058fea (patch) | |
tree | 036f0452ee16fe1b9b74c13e6ad9bb9155310256 /numpy/lib/npyio.py | |
parent | 61d945bdb5c9b2b3329e1b8468b5c7d0596dd9fc (diff) | |
parent | 11ee694744f2552d77652ed929fdc2b4ccca6843 (diff) | |
download | numpy-a14dd542532d383610c1b01c5698b137dd058fea.tar.gz |
merging refs/remotes/origin/master into HEAD
Diffstat (limited to 'numpy/lib/npyio.py')
-rw-r--r-- | numpy/lib/npyio.py | 251 |
1 files changed, 171 insertions, 80 deletions
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 8c4a1a823..2668357bc 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -1,6 +1,6 @@ __all__ = ['savetxt', 'loadtxt', 'genfromtxt', 'ndfromtxt', 'mafromtxt', - 'recfromtxt', 'recfromcsv', 'load', 'loads', 'save', 'savez', - 'packbits', 'unpackbits', 'fromregex', 'DataSource'] + 'recfromtxt', 'recfromcsv', 'load', 'loads', 'save', 'savez', + 'savez_compressed', 'packbits', 'unpackbits', 'fromregex', 'DataSource'] import numpy as np import format @@ -108,7 +108,11 @@ class BagObj(object): except KeyError: raise AttributeError, key - +def zipfile_factory(*args, **kwargs): + import zipfile + if sys.version_info >= (2, 5): + kwargs['allowZip64'] = True + return zipfile.ZipFile(*args, **kwargs) class NpzFile(object): """ @@ -142,6 +146,9 @@ class NpzFile(object): fid : file or str The zipped archive to open. This is either a file-like object or a string containing the path to the archive. + own_fid : bool, optional + Whether NpzFile should close the file handle. + Requires that `fid` is a file-like object. Examples -------- @@ -163,11 +170,10 @@ class NpzFile(object): array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) """ - def __init__(self, fid): + def __init__(self, fid, own_fid=False): # Import is postponed to here since zipfile depends on gzip, an optional # component of the so-called standard library. - import zipfile - _zip = zipfile.ZipFile(fid) + _zip = zipfile_factory(fid) self._files = _zip.namelist() self.files = [] for x in self._files: @@ -177,6 +183,25 @@ class NpzFile(object): self.files.append(x) self.zip = _zip self.f = BagObj(self) + if own_fid: + self.fid = fid + else: + self.fid = None + + def close(self): + """ + Close the file. + + """ + if self.zip is not None: + self.zip.close() + self.zip = None + if self.fid is not None: + self.fid.close() + self.fid = None + + def __del__(self): + self.close() def __getitem__(self, key): # FIXME: This seems like it will copy strings around @@ -293,31 +318,39 @@ def load(file, mmap_mode=None): """ import gzip + own_fid = False if isinstance(file, basestring): fid = open(file, "rb") + own_fid = True elif isinstance(file, gzip.GzipFile): fid = seek_gzip_factory(file) + own_fid = True else: fid = file - # Code to distinguish from NumPy binary files and pickles. - _ZIP_PREFIX = asbytes('PK\x03\x04') - N = len(format.MAGIC_PREFIX) - magic = fid.read(N) - fid.seek(-N, 1) # back-up - if magic.startswith(_ZIP_PREFIX): # zip-file (assume .npz) - return NpzFile(fid) - elif magic == format.MAGIC_PREFIX: # .npy file - if mmap_mode: - return format.open_memmap(file, mode=mmap_mode) - else: - return format.read_array(fid) - else: # Try a pickle - try: - return _cload(fid) - except: - raise IOError, \ - "Failed to interpret file %s as a pickle" % repr(file) + try: + # Code to distinguish from NumPy binary files and pickles. + _ZIP_PREFIX = asbytes('PK\x03\x04') + N = len(format.MAGIC_PREFIX) + magic = fid.read(N) + fid.seek(-N, 1) # back-up + if magic.startswith(_ZIP_PREFIX): # zip-file (assume .npz) + own_fid = False + return NpzFile(fid, own_fid=True) + elif magic == format.MAGIC_PREFIX: # .npy file + if mmap_mode: + return format.open_memmap(file, mode=mmap_mode) + else: + return format.read_array(fid) + else: # Try a pickle + try: + return _cload(fid) + except: + raise IOError, \ + "Failed to interpret file %s as a pickle" % repr(file) + finally: + if own_fid: + fid.close() def save(file, arr): """ @@ -335,7 +368,7 @@ def save(file, arr): See Also -------- - savez : Save several arrays into a ``.npz`` compressed archive + savez : Save several arrays into a ``.npz`` archive savetxt, load Notes @@ -355,19 +388,25 @@ def save(file, arr): array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) """ + own_fid = False if isinstance(file, basestring): if not file.endswith('.npy'): file = file + '.npy' fid = open(file, "wb") + own_fid = True else: fid = file - arr = np.asanyarray(arr) - format.write_array(fid, arr) + try: + arr = np.asanyarray(arr) + format.write_array(fid, arr) + finally: + if own_fid: + fid.close() def savez(file, *args, **kwds): """ - Save several arrays into a single, archive file in ``.npz`` format. + Save several arrays into a single file in uncompressed ``.npz`` format. If arguments are passed in with no keywords, the corresponding variable names, in the .npz file, are 'arr_0', 'arr_1', etc. If keyword arguments @@ -380,12 +419,12 @@ def savez(file, *args, **kwds): Either the file name (string) or an open file (file-like object) where the data will be saved. If file is a string, the ``.npz`` extension will be appended to the file name if it is not already there. - \\*args : Arguments, optional + *args : Arguments, optional Arrays to save to the file. Since it is not possible for Python to know the names of the arrays outside `savez`, the arrays will be saved with names "arr_0", "arr_1", and so on. These arguments can be any expression. - \\*\\*kwds : Keyword arguments, optional + **kwds : Keyword arguments, optional Arrays to save to the file. Arrays will be saved in the file with the keyword names. @@ -417,7 +456,7 @@ def savez(file, *args, **kwds): >>> x = np.arange(10) >>> y = np.sin(x) - Using `savez` with \\*args, the arrays are saved with default names. + Using `savez` with *args, the arrays are saved with default names. >>> np.savez(outfile, x, y) >>> outfile.seek(0) # Only needed here to simulate closing & reopening file @@ -427,7 +466,7 @@ def savez(file, *args, **kwds): >>> npzfile['arr_0'] array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) - Using `savez` with \\*\\*kwds, the arrays are saved with the keyword names. + Using `savez` with **kwds, the arrays are saved with the keyword names. >>> outfile = TemporaryFile() >>> np.savez(outfile, x=x, y=y) @@ -438,8 +477,38 @@ def savez(file, *args, **kwds): >>> npzfile['x'] array([0, 1, 2, 3, 4, 5, 6, 7, 8, 9]) + See Also + -------- + numpy.savez_compressed : Save several arrays into a compressed .npz file format + + """ + _savez(file, args, kwds, False) + +def savez_compressed(file, *args, **kwds): """ + Save several arrays into a single file in compressed ``.npz`` format. + + If keyword arguments are given, then filenames are taken from the keywords. + If arguments are passed in with no keywords, then stored file names are + arr_0, arr_1, etc. + Parameters + ---------- + file : string + File name of .npz file. + args : Arguments + Function arguments. + kwds : Keyword arguments + Keywords. + + See Also + -------- + numpy.savez : Save several arrays into an uncompressed .npz file format + + """ + _savez(file, args, kwds, True) + +def _savez(file, args, kwds, compress): # Import is postponed to here since zipfile depends on gzip, an optional # component of the so-called standard library. import zipfile @@ -457,7 +526,12 @@ def savez(file, *args, **kwds): raise ValueError, "Cannot use un-named variables and keyword %s" % key namedict[key] = val - zip = zipfile.ZipFile(file, mode="w") + if compress: + compression = zipfile.ZIP_DEFLATED + else: + compression = zipfile.ZIP_STORED + + zip = zipfile_factory(file, mode="w", compression=compression) # Stage arrays in a temporary file on disk, before writing to zip. fd, tmpfile = tempfile.mkstemp(suffix='-numpy.npy') @@ -586,9 +660,9 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, if usecols is not None: usecols = list(usecols) - isstring = False + own_fh = False if _is_string_like(fname): - isstring = True + own_fh = True if fname.endswith('.gz'): import gzip fh = seek_gzip_factory(fname) @@ -676,7 +750,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, # Convert each value according to its column and store X.append(tuple([conv(val) for (conv, val) in zip(converters, vals)])) finally: - if isstring: + if own_fh: fh.close() if len(dtype_types) > 1: @@ -798,7 +872,9 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n'): fmt = asstr(fmt) delimiter = asstr(delimiter) + own_fh = False if _is_string_like(fname): + own_fh = True if fname.endswith('.gz'): import gzip fh = gzip.open(fname, 'wb') @@ -812,39 +888,43 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n'): else: raise ValueError('fname must be a string or file handle') - X = np.asarray(X) + try: + X = np.asarray(X) - # Handle 1-dimensional arrays - if X.ndim == 1: - # Common case -- 1d array of numbers - if X.dtype.names is None: - X = np.atleast_2d(X).T - ncol = 1 + # Handle 1-dimensional arrays + if X.ndim == 1: + # Common case -- 1d array of numbers + if X.dtype.names is None: + X = np.atleast_2d(X).T + ncol = 1 - # Complex dtype -- each field indicates a separate column - else: - ncol = len(X.dtype.descr) - else: - ncol = X.shape[1] - - # `fmt` can be a string with multiple insertion points or a list of formats. - # E.g. '%10.5f\t%10d' or ('%10.5f', '$10d') - if type(fmt) in (list, tuple): - if len(fmt) != ncol: - raise AttributeError('fmt has wrong shape. %s' % str(fmt)) - format = asstr(delimiter).join(map(asstr, fmt)) - elif type(fmt) is str: - if fmt.count('%') == 1: - fmt = [fmt, ]*ncol - format = delimiter.join(fmt) - elif fmt.count('%') != ncol: - raise AttributeError('fmt has wrong number of %% formats. %s' - % fmt) + # Complex dtype -- each field indicates a separate column + else: + ncol = len(X.dtype.descr) else: - format = fmt + ncol = X.shape[1] + + # `fmt` can be a string with multiple insertion points or a + # list of formats. E.g. '%10.5f\t%10d' or ('%10.5f', '$10d') + if type(fmt) in (list, tuple): + if len(fmt) != ncol: + raise AttributeError('fmt has wrong shape. %s' % str(fmt)) + format = asstr(delimiter).join(map(asstr, fmt)) + elif type(fmt) is str: + if fmt.count('%') == 1: + fmt = [fmt, ]*ncol + format = delimiter.join(fmt) + elif fmt.count('%') != ncol: + raise AttributeError('fmt has wrong number of %% formats. %s' + % fmt) + else: + format = fmt - for row in X: - fh.write(asbytes(format % tuple(row) + newline)) + for row in X: + fh.write(asbytes(format % tuple(row) + newline)) + finally: + if own_fh: + fh.close() import re def fromregex(file, regexp, dtype): @@ -902,25 +982,32 @@ def fromregex(file, regexp, dtype): array([1312, 1534, 444], dtype=int64) """ + own_fh = False if not hasattr(file, "read"): file = open(file, 'rb') - if not hasattr(regexp, 'match'): - regexp = re.compile(asbytes(regexp)) - if not isinstance(dtype, np.dtype): - dtype = np.dtype(dtype) + own_fh = True - seq = regexp.findall(file.read()) - if seq and not isinstance(seq[0], tuple): - # Only one group is in the regexp. - # Create the new array as a single data-type and then - # re-interpret as a single-field structured array. - newdtype = np.dtype(dtype[dtype.names[0]]) - output = np.array(seq, dtype=newdtype) - output.dtype = dtype - else: - output = np.array(seq, dtype=dtype) + try: + if not hasattr(regexp, 'match'): + regexp = re.compile(asbytes(regexp)) + if not isinstance(dtype, np.dtype): + dtype = np.dtype(dtype) + + seq = regexp.findall(file.read()) + if seq and not isinstance(seq[0], tuple): + # Only one group is in the regexp. + # Create the new array as a single data-type and then + # re-interpret as a single-field structured array. + newdtype = np.dtype(dtype[dtype.names[0]]) + output = np.array(seq, dtype=newdtype) + output.dtype = dtype + else: + output = np.array(seq, dtype=dtype) - return output + return output + finally: + if own_fh: + fh.close() @@ -1092,8 +1179,10 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, raise TypeError(errmsg % type(user_converters)) # Initialize the filehandle, the LineSplitter and the NameValidator + own_fhd = False if isinstance(fname, basestring): fhd = np.lib._datasource.open(fname, 'U') + own_fhd = True elif not hasattr(fname, 'read'): raise TypeError("The input should be a string or a filehandle. "\ "(got %s instead)" % type(fname)) @@ -1354,6 +1443,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, append_to_masks(tuple([v.strip() in m for (v, m) in zip(values, missing_values)])) + if own_fhd: + fhd.close() # Upgrade the converters (if needed) if dtype is None: |