ENH: add options for disabling use of pickle in load/save

author: Pauli Virtanen <pav@iki.fi> 2015-04-17 14:29:43 +0300
committer: Pauli Virtanen <pav@iki.fi> 2015-04-18 22:06:07 +0300
commit: a2bd3a7eabfe053d6d16a2130fdcad9e5211f6bb (patch)
tree: 4de542b4f706b39a2794f58cccdde2442a163528 /numpy/lib/npyio.py
parent: 0752872542593bb8703977d5d1e458dbca0cb712 (diff)
download: numpy-a2bd3a7eabfe053d6d16a2130fdcad9e5211f6bb.tar.gz
1 files changed, 36 insertions, 8 deletions
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index 5ebeae6c3..bab30355c 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -164,6 +164,8 @@ class NpzFile(object):
     f : BagObj instance
         An object on which attribute can be performed as an alternative
         to getitem access on the `NpzFile` instance itself.
+    allow_pickle : bool, optional
+        Allow loading pickled data. Default: True
     pickle_kwargs : dict, optional
         Additional keyword arguments to pass on to pickle.load.
         These are only useful when loading object arrays saved on
@@ -199,12 +201,14 @@ class NpzFile(object):
 
     """
 
-    def __init__(self, fid, own_fid=False, pickle_kwargs=None):
+    def __init__(self, fid, own_fid=False, allow_pickle=True,
+                 pickle_kwargs=None):
         # Import is postponed to here since zipfile depends on gzip, an
         # optional component of the so-called standard library.
         _zip = zipfile_factory(fid)
         self._files = _zip.namelist()
         self.files = []
+        self.allow_pickle = allow_pickle
         self.pickle_kwargs = pickle_kwargs
         for x in self._files:
             if x.endswith('.npy'):
@@ -262,6 +266,7 @@ class NpzFile(object):
             if magic == format.MAGIC_PREFIX:
                 bytes = self.zip.open(key)
                 return format.read_array(bytes,
+                                         allow_pickle=self.allow_pickle,
                                          pickle_kwargs=self.pickle_kwargs)
             else:
                 return self.zip.read(key)
@@ -295,7 +300,8 @@ class NpzFile(object):
         return self.files.__contains__(key)
 
 
-def load(file, mmap_mode=None, fix_imports=True, encoding='ASCII'):
+def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True,
+         encoding='ASCII'):
     """
     Load arrays or pickled objects from ``.npy``, ``.npz`` or pickled files.
 
@@ -312,6 +318,12 @@ def load(file, mmap_mode=None, fix_imports=True, encoding='ASCII'):
         and sliced like any ndarray.  Memory mapping is especially useful
         for accessing small fragments of large files without reading the
         entire file into memory.
+    allow_pickle : bool, optional
+        Allow loading pickled object arrays stored in npy files. Reasons for
+        disallowing pickles include security, as loading pickled data can
+        execute arbitrary code. If pickles are disallowed, loading object
+        arrays will fail.
+        Default: True
     fix_imports : bool, optional
         Only useful when loading Python 2 generated pickled files on Python 3,
         which includes npy/npz files containing object arrays. If `fix_imports`
@@ -324,7 +336,6 @@ def load(file, mmap_mode=None, fix_imports=True, encoding='ASCII'):
         'ASCII', and 'bytes' are not allowed, as they can corrupt numerical
         data. Default: 'ASCII'
 
-
     Returns
     -------
     result : array, tuple, dict, etc.
@@ -335,6 +346,8 @@ def load(file, mmap_mode=None, fix_imports=True, encoding='ASCII'):
     ------
     IOError
         If the input file does not exist or cannot be read.
+    ValueError
+        The file contains an object array, but allow_pickle=False given.
 
     See Also
     --------
@@ -430,15 +443,20 @@ def load(file, mmap_mode=None, fix_imports=True, encoding='ASCII'):
             # Transfer file ownership to NpzFile
             tmp = own_fid
             own_fid = False
-            return NpzFile(fid, own_fid=tmp, pickle_kwargs=pickle_kwargs)
+            return NpzFile(fid, own_fid=tmp, allow_pickle=allow_pickle,
+                           pickle_kwargs=pickle_kwargs)
         elif magic == format.MAGIC_PREFIX:
             # .npy file
             if mmap_mode:
                 return format.open_memmap(file, mode=mmap_mode)
             else:
-                return format.read_array(fid, pickle_kwargs=pickle_kwargs)
+                return format.read_array(fid, allow_pickle=allow_pickle,
+                                         pickle_kwargs=pickle_kwargs)
         else:
             # Try a pickle
+            if not allow_pickle:
+                raise ValueError("allow_pickle=False, but file does not contain "
+                                 "non-pickled data")
             try:
                 return pickle.load(fid, **pickle_kwargs)
             except:
@@ -449,7 +467,7 @@ def load(file, mmap_mode=None, fix_imports=True, encoding='ASCII'):
             fid.close()
 
 
-def save(file, arr, fix_imports=True):
+def save(file, arr, allow_pickle=True, fix_imports=True):
     """
     Save an array to a binary file in NumPy ``.npy`` format.
 
@@ -460,6 +478,14 @@ def save(file, arr, fix_imports=True):
         then the filename is unchanged.  If file is a string, a ``.npy``
         extension will be appended to the file name if it does not already
         have one.
+    allow_pickle : bool, optional
+        Allow saving object arrays using Python pickles. Reasons for disallowing
+        pickles include security (loading pickled data can execute arbitrary
+        code) and portability (pickled objects may not be loadable on different
+        Python installations, for example if the stored objects require libraries
+        that are not available, and not all pickled data is compatible between
+        Python 2 and Python 3).
+        Default: True
     fix_imports : bool, optional
         Only useful in forcing objects in object arrays on Python 3 to be
         pickled in a Python 2 compatible way. If `fix_imports` is True, pickle
@@ -509,7 +535,8 @@ def save(file, arr, fix_imports=True):
 
     try:
         arr = np.asanyarray(arr)
-        format.write_array(fid, arr, pickle_kwargs=pickle_kwargs)
+        format.write_array(fid, arr, allow_pickle=allow_pickle,
+                           pickle_kwargs=pickle_kwargs)
     finally:
         if own_fid:
             fid.close()
@@ -621,7 +648,7 @@ def savez_compressed(file, *args, **kwds):
     _savez(file, args, kwds, True)
 
 
-def _savez(file, args, kwds, compress, pickle_kwargs=None):
+def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None):
     # Import is postponed to here since zipfile depends on gzip, an optional
     # component of the so-called standard library.
     import zipfile
@@ -656,6 +683,7 @@ def _savez(file, args, kwds, compress, pickle_kwargs=None):
             fid = open(tmpfile, 'wb')
             try:
                 format.write_array(fid, np.asanyarray(val),
+                                   allow_pickle=allow_pickle,
                                    pickle_kwargs=pickle_kwargs)
                 fid.close()
                 fid = None
author	Pauli Virtanen <pav@iki.fi>	2015-04-17 14:29:43 +0300
committer	Pauli Virtanen <pav@iki.fi>	2015-04-18 22:06:07 +0300
commit	a2bd3a7eabfe053d6d16a2130fdcad9e5211f6bb (patch)
tree	4de542b4f706b39a2794f58cccdde2442a163528 /numpy/lib/npyio.py
parent	0752872542593bb8703977d5d1e458dbca0cb712 (diff)
download	numpy-a2bd3a7eabfe053d6d16a2130fdcad9e5211f6bb.tar.gz