ENH: Add support for pathlib.Path objects to save/load functions

author: Wendell Smith <wackywendell@gmail.com> 2015-10-26 17:34:00 -0400
committer: Wendell Smith <wackywendell@gmail.com> 2016-04-06 22:17:40 -0400
commit: 5ac270b06e411dd0e13108ed5dafad31d5ab589d (patch)
tree: 7d3edfe92ad8d53b4db3155ce4fc6f02ee3c39fd
parent: 537d35c2cf49cae0a496c37564fa282ec80e3695 (diff)
download: numpy-5ac270b06e411dd0e13108ed5dafad31d5ab589d.tar.gz
6 files changed, 180 insertions, 19 deletions
diff --git a/doc/release/1.12.0-notes.rst b/doc/release/1.12.0-notes.rst
index b9e405154..c43c818fa 100644
--- a/doc/release/1.12.0-notes.rst
+++ b/doc/release/1.12.0-notes.rst
@@ -169,6 +169,12 @@ The *__complex__* method has been implemented on the ndarray object
 Calling ``complex()`` on a size 1 array will now cast to a python
 complex.
 
+``pathlib.Path`` objects now supported
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+The standard ``np.load``, ``np.save``, ``np.loadtxt``, ``np.savez``, and similar
+functions can now take ``pathlib.Path`` objects as an argument instead of a
+filename or open file object.
+
 
 Changes
 =======
diff --git a/numpy/compat/py3k.py b/numpy/compat/py3k.py
index d95a362ca..992ea50e6 100644
--- a/numpy/compat/py3k.py
+++ b/numpy/compat/py3k.py
@@ -7,9 +7,13 @@ from __future__ import division, absolute_import, print_function
 __all__ = ['bytes', 'asbytes', 'isfileobj', 'getexception', 'strchar',
            'unicode', 'asunicode', 'asbytes_nested', 'asunicode_nested',
            'asstr', 'open_latin1', 'long', 'basestring', 'sixu',
-           'integer_types']
+           'integer_types', 'is_pathlib_path', 'Path']
 
 import sys
+try:
+    from pathlib import Path
+except ImportError:
+    Path = None
 
 if sys.version_info[0] >= 3:
     import io
@@ -86,3 +90,10 @@ def asunicode_nested(x):
         return [asunicode_nested(y) for y in x]
     else:
         return asunicode(x)
+
+
+def is_pathlib_path(obj):
+    """
+    Check whether obj is a pathlib.Path object.
+    """
+    return Path is not None and isinstance(obj, Path)
diff --git a/numpy/core/memmap.py b/numpy/core/memmap.py
index 827909c47..5f6182742 100644
--- a/numpy/core/memmap.py
+++ b/numpy/core/memmap.py
@@ -2,7 +2,7 @@ from __future__ import division, absolute_import, print_function
 
 import numpy as np
 from .numeric import uint8, ndarray, dtype
-from numpy.compat import long, basestring
+from numpy.compat import long, basestring, is_pathlib_path
 
 __all__ = ['memmap']
 
@@ -39,7 +39,7 @@ class memmap(ndarray):
 
     Parameters
     ----------
-    filename : str or file-like object
+    filename : str, file-like object, or pathlib.Path instance
         The file name or file object to be used as the array data buffer.
     dtype : data-type, optional
         The data-type used to interpret the file contents.
@@ -82,7 +82,7 @@ class memmap(ndarray):
 
     Attributes
     ----------
-    filename : str
+    filename : str or pathlib.Path instance
         Path to the mapped file.
     offset : int
         Offset position in the file.
@@ -213,6 +213,9 @@ class memmap(ndarray):
         if hasattr(filename, 'read'):
             fid = filename
             own_file = False
+        elif is_pathlib_path(filename):
+            fid = filename.open((mode == 'c' and 'r' or mode)+'b')
+            own_file = True
         else:
             fid = open(filename, (mode == 'c' and 'r' or mode)+'b')
             own_file = True
@@ -267,6 +270,8 @@ class memmap(ndarray):
 
         if isinstance(filename, basestring):
             self.filename = os.path.abspath(filename)
+        elif is_pathlib_path(filename):
+            self.filename = filename.resolve()
         # py3 returns int for TemporaryFile().name
         elif (hasattr(filename, "name") and
               isinstance(filename.name, basestring)):
diff --git a/numpy/core/tests/test_memmap.py b/numpy/core/tests/test_memmap.py
index 47f58ea7e..4aa02e26f 100644
--- a/numpy/core/tests/test_memmap.py
+++ b/numpy/core/tests/test_memmap.py
@@ -7,6 +7,7 @@ from tempfile import NamedTemporaryFile, TemporaryFile, mktemp, mkdtemp
 
 from numpy import (
     memmap, sum, average, product, ndarray, isscalar, add, subtract, multiply)
+from numpy.compat import Path
 
 from numpy import arange, allclose, asarray
 from numpy.testing import (
@@ -73,6 +74,19 @@ class TestMemmap(TestCase):
         del b
         del fp
 
+    @dec.skipif(Path is None, "No pathlib.Path")
+    def test_path(self):
+        tmpname = mktemp('', 'mmap', dir=self.tempdir)
+        fp = memmap(Path(tmpname), dtype=self.dtype, mode='w+',
+                       shape=self.shape)
+        abspath = os.path.abspath(tmpname)
+        fp[:] = self.data[:]
+        self.assertEqual(abspath, str(fp.filename))
+        b = fp[:1]
+        self.assertEqual(abspath, str(b.filename))
+        del b
+        del fp
+
     def test_filename_fileobj(self):
         fp = memmap(self.tmpfp, dtype=self.dtype, mode="w+",
                     shape=self.shape)
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index a6e4a8dac..4b6770483 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -14,12 +14,12 @@ from ._datasource import DataSource
 from numpy.core.multiarray import packbits, unpackbits
 from ._iotools import (
     LineSplitter, NameValidator, StringConverter, ConverterError,
-    ConverterLockError, ConversionWarning, _is_string_like, has_nested_fields,
-    flatten_dtype, easy_dtype, _bytes_to_name
+    ConverterLockError, ConversionWarning, _is_string_like,
+    has_nested_fields, flatten_dtype, easy_dtype, _bytes_to_name
     )
 
 from numpy.compat import (
-    asbytes, asstr, asbytes_nested, bytes, basestring, unicode
+    asbytes, asstr, asbytes_nested, bytes, basestring, unicode, is_pathlib_path
     )
 
 if sys.version_info[0] >= 3:
@@ -86,10 +86,19 @@ class BagObj(object):
         return object.__getattribute__(self, '_obj').keys()
 
 
-def zipfile_factory(*args, **kwargs):
+def zipfile_factory(file, *args, **kwargs):
+    """
+    Create a ZipFile.
+    
+    Allows for Zip64, and the `file` argument can accept file, str, or
+    pathlib.Path objects. `args` and `kwargs` are passed to the zipfile.ZipFile
+    constructor.
+    """
+    if is_pathlib_path(file):
+        file = str(file)
     import zipfile
     kwargs['allowZip64'] = True
-    return zipfile.ZipFile(*args, **kwargs)
+    return zipfile.ZipFile(file, *args, **kwargs)
 
 
 class NpzFile(object):
@@ -261,7 +270,7 @@ def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True,
 
     Parameters
     ----------
-    file : file-like object or string
+    file : file-like object, string, or pathlib.Path
         The file to read. File-like objects must support the
         ``seek()`` and ``read()`` methods. Pickled files require that the
         file-like object support the ``readline()`` method as well.
@@ -355,12 +364,13 @@ def load(file, mmap_mode=None, allow_pickle=True, fix_imports=True,
     memmap([4, 5, 6])
 
     """
-    import gzip
-
     own_fid = False
     if isinstance(file, basestring):
         fid = open(file, "rb")
         own_fid = True
+    elif is_pathlib_path(file):
+        fid = file.open("rb")
+        own_fid = True
     else:
         fid = file
 
@@ -425,9 +435,9 @@ def save(file, arr, allow_pickle=True, fix_imports=True):
 
     Parameters
     ----------
-    file : file or str
+    file : file, str, or pathlib.Path
         File or filename to which the data is saved.  If file is a file-object,
-        then the filename is unchanged.  If file is a string, a ``.npy``
+        then the filename is unchanged.  If file is a string or Path, a ``.npy``
         extension will be appended to the file name if it does not already
         have one.
     allow_pickle : bool, optional
@@ -476,6 +486,11 @@ def save(file, arr, allow_pickle=True, fix_imports=True):
             file = file + '.npy'
         fid = open(file, "wb")
         own_fid = True
+    elif is_pathlib_path(file):
+        if not file.name.endswith('.npy'):
+            file = file.parent / (file.name + '.npy')
+        fid = file.open("wb")
+        own_fid = True
     else:
         fid = file
 
@@ -507,8 +522,9 @@ def savez(file, *args, **kwds):
     ----------
     file : str or file
         Either the file name (string) or an open file (file-like object)
-        where the data will be saved. If file is a string, the ``.npz``
-        extension will be appended to the file name if it is not already there.
+        where the data will be saved. If file is a string or a Path, the
+        ``.npz`` extension will be appended to the file name if it is not
+        already there.
     args : Arguments, optional
         Arrays to save to the file. Since it is not possible for Python to
         know the names of the arrays outside `savez`, the arrays will be saved
@@ -610,6 +626,9 @@ def _savez(file, args, kwds, compress, allow_pickle=True, pickle_kwargs=None):
     if isinstance(file, basestring):
         if not file.endswith('.npz'):
             file = file + '.npz'
+    elif is_pathlib_path(file):
+        if not file.name.endswith('.npz'):
+            file = file.parent / (file.name + '.npz')
 
     namedict = kwds
     for i, val in enumerate(args):
@@ -695,7 +714,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
 
     Parameters
     ----------
-    fname : file or str
+    fname : file, str, or pathlib.Path
         File, filename, or generator to read.  If the filename extension is
         ``.gz`` or ``.bz2``, the file is first decompressed. Note that
         generators should return byte strings for Python 3k.
@@ -822,6 +841,8 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
 
     fown = False
     try:
+        if is_pathlib_path(fname):
+            fname = str(fname)
         if _is_string_like(fname):
             fown = True
             if fname.endswith('.gz'):
@@ -1117,6 +1138,8 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n', header='',
     delimiter = asstr(delimiter)
 
     own_fh = False
+    if is_pathlib_path(fname):
+        fname = str(fname)
     if _is_string_like(fname):
         own_fh = True
         if fname.endswith('.gz'):
@@ -1302,7 +1325,7 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
 
     Parameters
     ----------
-    fname : file, str, list of str, generator
+    fname : file, str, pathlib.Path, list of str, generator
         File, filename, list, or generator to read.  If the filename
         extension is `.gz` or `.bz2`, the file is first decompressed. Mote
         that generators must return byte strings in Python 3k.  The strings
@@ -1477,6 +1500,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
     # Initialize the filehandle, the LineSplitter and the NameValidator
     own_fhd = False
     try:
+        if is_pathlib_path(fname):
+            fname = str(fname)
         if isinstance(fname, basestring):
             if sys.version_info[0] == 2:
                 fhd = iter(np.lib._datasource.open(fname, 'rbU'))
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
index c0f8c1953..720f2e74e 100644
--- a/numpy/lib/tests/test_io.py
+++ b/numpy/lib/tests/test_io.py
@@ -14,7 +14,7 @@ from datetime import datetime
 import numpy as np
 import numpy.ma as ma
 from numpy.lib._iotools import ConverterError, ConversionWarning
-from numpy.compat import asbytes, bytes, unicode
+from numpy.compat import asbytes, bytes, unicode, Path
 from numpy.ma.testutils import assert_equal
 from numpy.testing import (
     TestCase, run_module_suite, assert_warns, assert_,
@@ -1829,6 +1829,106 @@ M   33  21.99
         assert_equal(test['f1'], 17179869184)
         assert_equal(test['f2'], 1024)
 
+
+class TestPathUsage(TestCase):
+    # Test that pathlib.Path can be used
+    @np.testing.dec.skipif(Path is None, "No pathlib.Path")
+    def test_loadtxt(self):
+        with temppath(suffix='.txt') as path:
+            path = Path(path)
+            a = np.array([[1.1, 2], [3, 4]])
+            np.savetxt(path, a)
+            x = np.loadtxt(path)
+            assert_array_equal(x, a)
+
+    @np.testing.dec.skipif(Path is None, "No pathlib.Path")
+    def test_save_load(self):
+        # Test that pathlib.Path instances can be used with savez.
+        with temppath(suffix='.npy') as path:
+            path = Path(path)
+            a = np.array([[1, 2], [3, 4]], int)
+            np.save(path, a)
+            data = np.load(path)
+            assert_array_equal(data, a)
+
+    @np.testing.dec.skipif(Path is None, "No pathlib.Path")
+    def test_savez_load(self):
+        # Test that pathlib.Path instances can be used with savez.
+        with temppath(suffix='.npz') as path:
+            path = Path(path)
+            np.savez(path, lab='place holder')
+            with np.load(path) as data:
+                assert_array_equal(data['lab'], 'place holder')
+
+    @np.testing.dec.skipif(Path is None, "No pathlib.Path")
+    def test_savez_compressed_load(self):
+        # Test that pathlib.Path instances can be used with savez.
+        with temppath(suffix='.npz') as path:
+            path = Path(path)
+            np.savez_compressed(path, lab='place holder')
+            data = np.load(path)
+            assert_array_equal(data['lab'], 'place holder')
+            data.close()
+    
+    @np.testing.dec.skipif(Path is None, "No pathlib.Path")
+    def test_genfromtxt(self):
+        with temppath(suffix='.txt') as path:
+            path = Path(path)
+            a = np.array([(1, 2), (3, 4)])
+            np.savetxt(path, a)
+            data = np.genfromtxt(path)
+            assert_array_equal(a, data)
+    
+    @np.testing.dec.skipif(Path is None, "No pathlib.Path")
+    def test_ndfromtxt(self):
+        # Test outputing a standard ndarray
+        with temppath(suffix='.txt') as path:
+            path = Path(path)
+            with path.open('w') as f:
+                f.write('1 2\n3 4')
+            control = np.array([[1, 2], [3, 4]], dtype=int)
+            test = np.ndfromtxt(path, dtype=int)
+            assert_array_equal(test, control)
+    
+    @np.testing.dec.skipif(Path is None, "No pathlib.Path")
+    def test_mafromtxt(self):
+        # From `test_fancy_dtype_alt` above
+        with temppath(suffix='.txt') as path:
+            path = Path(path)
+            with path.open('w') as f:
+                f.write('1,2,3.0\n4,5,6.0\n')
+                
+            test = np.mafromtxt(path, delimiter=',')
+            control = ma.array([(1.0, 2.0, 3.0), (4.0, 5.0, 6.0)])
+            assert_equal(test, control)
+    
+    @np.testing.dec.skipif(Path is None, "No pathlib.Path")
+    def test_recfromtxt(self):
+        with temppath(suffix='.txt') as path:
+            path = Path(path)
+            with path.open('w') as f:
+                f.write('A,B\n0,1\n2,3')
+            kwargs = dict(delimiter=",", missing_values="N/A", names=True)
+            test = np.recfromtxt(path, **kwargs)
+            control = np.array([(0, 1), (2, 3)],
+                               dtype=[('A', np.int), ('B', np.int)])
+            self.assertTrue(isinstance(test, np.recarray))
+            assert_equal(test, control)
+    
+    @np.testing.dec.skipif(Path is None, "No pathlib.Path")
+    def test_recfromcsv(self):
+        with temppath(suffix='.txt') as path:
+            path = Path(path)
+            with path.open('w') as f:
+                f.write('A,B\n0,1\n2,3')
+            kwargs = dict(missing_values="N/A", names=True, case_sensitive=True)
+            test = np.recfromcsv(path, dtype=None, **kwargs)
+            control = np.array([(0, 1), (2, 3)],
+                               dtype=[('A', np.int), ('B', np.int)])
+            self.assertTrue(isinstance(test, np.recarray))
+            assert_equal(test, control)
+
+
 def test_gzip_load():
     a = np.random.random((5, 5))
author	Wendell Smith <wackywendell@gmail.com>	2015-10-26 17:34:00 -0400
committer	Wendell Smith <wackywendell@gmail.com>	2016-04-06 22:17:40 -0400
commit	5ac270b06e411dd0e13108ed5dafad31d5ab589d (patch)
tree	7d3edfe92ad8d53b4db3155ce4fc6f02ee3c39fd
parent	537d35c2cf49cae0a496c37564fa282ec80e3695 (diff)
download	numpy-5ac270b06e411dd0e13108ed5dafad31d5ab589d.tar.gz