Add memory map support to `load` [patch by Gael Varoquaux]. Closes #954.

author: Stefan van der Walt <stefan@sun.ac.za> 2008-11-29 12:07:07 +0000
committer: Stefan van der Walt <stefan@sun.ac.za> 2008-11-29 12:07:07 +0000
commit: 17206034a2abc6b1961f41fa1e4fa65a0ae93ca1 (patch)
tree: e4d380a19962dbd2d373335bda47f082c3fc1b10 /numpy/lib/io.py
parent: 9940ab08630129047a5a460466d880369a6f571d (diff)
download: numpy-17206034a2abc6b1961f41fa1e4fa65a0ae93ca1.tar.gz
1 files changed, 29 insertions, 14 deletions
diff --git a/numpy/lib/io.py b/numpy/lib/io.py
index deb86fb61..3d41abeef 100644
--- a/numpy/lib/io.py
+++ b/numpy/lib/io.py
@@ -79,7 +79,7 @@ class NpzFile(object):
         else:
             raise KeyError, "%s is not a file in the archive" % key
 
-def load(file, memmap=False):
+def load(file, mmap_mode=None):
     """
     Load a pickled, ``.npy``, or ``.npz`` binary file.
 
@@ -87,10 +87,15 @@ def load(file, memmap=False):
     ----------
     file : file-like object or string
         The file to read.  It must support ``seek()`` and ``read()`` methods.
-    memmap : bool
-        If True, then memory-map the ``.npy`` file (or unzip the ``.npz`` file
-        into a temporary directory and memory-map each component).  This has
-        no effect for a pickled file.
+    mmap_mode: {None, 'r+', 'r', 'w+', 'c'}, optional
+        If not None, then memory-map the file, using the given mode
+        (see `numpy.memmap`).  The mode has no effect for pickled or
+        zipped files.
+        A memory-mapped array is stored on disk, and not directly loaded
+        into memory.  However, it can be accessed and sliced like any
+        ndarray.  Memory mapping is especially useful for accessing
+        small fragments of large files without reading the entire file
+        into memory.
 
     Returns
     -------
@@ -104,18 +109,28 @@ def load(file, memmap=False):
 
     Notes
     -----
-    - If file contains pickle data, then whatever is stored in the
+    - If the file contains pickle data, then whatever is stored in the
       pickle is returned.
     - If the file is a ``.npy`` file, then an array is returned.
     - If the file is a ``.npz`` file, then a dictionary-like object is
-      returned, containing {filename: array} key-value pairs, one for
-      every file in the archive.
+      returned, containing ``{filename: array}`` key-value pairs, one for
+      each file in the archive.
 
     Examples
     --------
-    >>> np.save('/tmp/123', np.array([1, 2, 3])
+    Store data to disk, and load it again:
+
+    >>> np.save('/tmp/123', np.array([[1, 2, 3], [4, 5, 6]]))
     >>> np.load('/tmp/123.npy')
-    array([1, 2, 3])
+    array([[1, 2, 3],
+           [4, 5, 6]])
+
+    Mem-map the stored array, and then access the second row
+    directly from disk:
+
+    >>> X = np.load('/tmp/123.npy', mmap_mode='r')
+    >>> X[1, :]
+    memmap([4, 5, 6])
 
     """
     if isinstance(file, basestring):
@@ -123,9 +138,6 @@ def load(file, memmap=False):
     else:
         fid = file
 
-    if memmap:
-        raise NotImplementedError
-
     # Code to distinguish from NumPy binary files and pickles.
     _ZIP_PREFIX = 'PK\x03\x04'
     N = len(format.MAGIC_PREFIX)
@@ -134,7 +146,10 @@ def load(file, memmap=False):
     if magic.startswith(_ZIP_PREFIX):  # zip-file (assume .npz)
         return NpzFile(fid)
     elif magic == format.MAGIC_PREFIX: # .npy file
-        return format.read_array(fid)
+        if mmap_mode:
+            return format.open_memmap(file, mode=mmap_mode)
+        else:
+            return format.read_array(fid)
     else:  # Try a pickle
         try:
             return _cload(fid)
author	Stefan van der Walt <stefan@sun.ac.za>	2008-11-29 12:07:07 +0000
committer	Stefan van der Walt <stefan@sun.ac.za>	2008-11-29 12:07:07 +0000
commit	17206034a2abc6b1961f41fa1e4fa65a0ae93ca1 (patch)
tree	e4d380a19962dbd2d373335bda47f082c3fc1b10 /numpy/lib/io.py
parent	9940ab08630129047a5a460466d880369a6f571d (diff)
download	numpy-17206034a2abc6b1961f41fa1e4fa65a0ae93ca1.tar.gz