diff options
author | Charles Harris <charlesr.harris@gmail.com> | 2017-10-15 15:45:29 -0600 |
---|---|---|
committer | GitHub <noreply@github.com> | 2017-10-15 15:45:29 -0600 |
commit | ebf4a6c1d19bff4fa8ea389836bdd15008e4bbfd (patch) | |
tree | c10c507dad2d754cf055774a4eb174c8b431aeec | |
parent | 3f300baa3644c39416d60b8c117b4c57e9445005 (diff) | |
parent | 52c1ef6ff7cfc697930f9bf4f1eebc59ee7f538e (diff) | |
download | numpy-ebf4a6c1d19bff4fa8ea389836bdd15008e4bbfd.tar.gz |
Merge pull request #9863 from serhiy-storchaka/savez-no-tmpfile
ENH: Save to ZIP files without using temporary files.
-rw-r--r-- | doc/release/1.14.0-notes.rst | 5 | ||||
-rw-r--r-- | numpy/lib/npyio.py | 55 |
2 files changed, 38 insertions, 22 deletions
diff --git a/doc/release/1.14.0-notes.rst b/doc/release/1.14.0-notes.rst index 66ae50769..5dbd9b44c 100644 --- a/doc/release/1.14.0-notes.rst +++ b/doc/release/1.14.0-notes.rst @@ -263,6 +263,11 @@ common cache line size. This makes ``npy`` files easier to use in programs which open them with ``mmap``, especially on Linux where an ``mmap`` offset must be a multiple of the page size. +NPZ files now can be written without using temporary files +---------------------------------------------------------- +In Python 3.6+ ``numpy.savez`` and ``numpy.savez_compressed`` now write +directly to a ZIP file, without creating intermediate temporary files. + Better support for empty structured and string types ---------------------------------------------------- Structured types can contain zero fields, and string dtypes can contain zero diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 7598b2c6b..96355ebc8 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -661,8 +661,6 @@ def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None): # Import is postponed to here since zipfile depends on gzip, an optional # component of the so-called standard library. import zipfile - # Import deferred for startup time improvement - import tempfile if isinstance(file, basestring): if not file.endswith('.npz'): @@ -686,31 +684,44 @@ def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None): zipf = zipfile_factory(file, mode="w", compression=compression) - # Stage arrays in a temporary file on disk, before writing to zip. - - # Since target file might be big enough to exceed capacity of a global - # temporary directory, create temp file side-by-side with the target file. - file_dir, file_prefix = os.path.split(file) if _is_string_like(file) else (None, 'tmp') - fd, tmpfile = tempfile.mkstemp(prefix=file_prefix, dir=file_dir, suffix='-numpy.npy') - os.close(fd) - try: + if sys.version_info >= (3, 6): + # Since Python 3.6 it is possible to write directly to a ZIP file. for key, val in namedict.items(): fname = key + '.npy' - fid = open(tmpfile, 'wb') - try: - format.write_array(fid, np.asanyarray(val), + val = np.asanyarray(val) + force_zip64 = val.nbytes >= 2**30 + with zipf.open(fname, 'w', force_zip64=force_zip64) as fid: + format.write_array(fid, val, allow_pickle=allow_pickle, pickle_kwargs=pickle_kwargs) - fid.close() - fid = None - zipf.write(tmpfile, arcname=fname) - except IOError as exc: - raise IOError("Failed to write to %s: %s" % (tmpfile, exc)) - finally: - if fid: + else: + # Stage arrays in a temporary file on disk, before writing to zip. + + # Import deferred for startup time improvement + import tempfile + # Since target file might be big enough to exceed capacity of a global + # temporary directory, create temp file side-by-side with the target file. + file_dir, file_prefix = os.path.split(file) if _is_string_like(file) else (None, 'tmp') + fd, tmpfile = tempfile.mkstemp(prefix=file_prefix, dir=file_dir, suffix='-numpy.npy') + os.close(fd) + try: + for key, val in namedict.items(): + fname = key + '.npy' + fid = open(tmpfile, 'wb') + try: + format.write_array(fid, np.asanyarray(val), + allow_pickle=allow_pickle, + pickle_kwargs=pickle_kwargs) fid.close() - finally: - os.remove(tmpfile) + fid = None + zipf.write(tmpfile, arcname=fname) + except IOError as exc: + raise IOError("Failed to write to %s: %s" % (tmpfile, exc)) + finally: + if fid: + fid.close() + finally: + os.remove(tmpfile) zipf.close() |