Merge pull request #5178 from charris/fix-npz-header-incompatibility

Fix npz header incompatibility
author: Julian Taylor <juliantaylor108@gmail.com> 2014-10-13 20:12:38 +0200
committer: Julian Taylor <juliantaylor108@gmail.com> 2014-10-13 20:12:38 +0200
commit: 2f1786306e696709481ccfc95b844d5aa2700b6d (patch)
tree: d8d67febde9bda643339e8771fe941602128a776 /numpy/lib
parent: 2ba94e726c7adf5b6ea7f2e49aadb78fe8b1d7ba (diff)
parent: 8b1f90a227b5fcf2a481c973e522693758c9f20f (diff)
download: numpy-2f1786306e696709481ccfc95b844d5aa2700b6d.tar.gz
4 files changed, 51 insertions, 1 deletions
diff --git a/numpy/lib/format.py b/numpy/lib/format.py
index 98743b6ad..7c8dfbafa 100644
--- a/numpy/lib/format.py
+++ b/numpy/lib/format.py
@@ -141,7 +141,7 @@ import sys
 import io
 import warnings
 from numpy.lib.utils import safe_eval
-from numpy.compat import asbytes, isfileobj, long, basestring
+from numpy.compat import asbytes, asstr, isfileobj, long, basestring
 
 if sys.version_info[0] >= 3:
     import pickle
@@ -410,6 +410,45 @@ def read_array_header_2_0(fp):
     """
     _read_array_header(fp, version=(2, 0))
 
+
+def _filter_header(s):
+    """Clean up 'L' in npz header ints.
+
+    Cleans up the 'L' in strings representing integers. Needed to allow npz
+    headers produced in Python2 to be read in Python3.
+
+    Parameters
+    ----------
+    s : byte string
+        Npy file header.
+
+    Returns
+    -------
+    header : str
+        Cleaned up header.
+
+    """
+    import tokenize
+    if sys.version_info[0] >= 3:
+        from io import StringIO
+    else:
+        from StringIO import StringIO
+ 
+    tokens = []
+    last_token_was_number = False
+    for token in tokenize.generate_tokens(StringIO(asstr(s)).read):
+        token_type = token[0]
+        token_string = token[1]
+        if (last_token_was_number and
+                token_type == tokenize.NAME and
+                token_string == "L"):
+            continue
+        else:
+            tokens.append(token)
+        last_token_was_number = (token_type == tokenize.NUMBER)
+    return tokenize.untokenize(tokens)
+
+ 
 def _read_array_header(fp, version):
     """
     see read_array_header_1_0
@@ -434,6 +473,7 @@ def _read_array_header(fp, version):
     #   "shape" : tuple of int
     #   "fortran_order" : bool
     #   "descr" : dtype.descr
+    header = _filter_header(header)
     try:
         d = safe_eval(header)
     except SyntaxError as e:
diff --git a/numpy/lib/tests/data/python3.npy b/numpy/lib/tests/data/python3.npy
new file mode 100644
index 000000000..7c6997dd6
--- /dev/null
+++ b/numpy/lib/tests/data/python3.npy
diff --git a/numpy/lib/tests/data/win64python2.npy b/numpy/lib/tests/data/win64python2.npy
new file mode 100644
index 000000000..d9bc36af7
--- /dev/null
+++ b/numpy/lib/tests/data/win64python2.npy
diff --git a/numpy/lib/tests/test_format.py b/numpy/lib/tests/test_format.py
index c09386789..ee77386bc 100644
--- a/numpy/lib/tests/test_format.py
+++ b/numpy/lib/tests/test_format.py
@@ -524,6 +524,16 @@ def test_compressed_roundtrip():
     assert_array_equal(arr, arr1)
 
 
+def test_python2_python3_interoperability():
+    if sys.version_info[0] >= 3:
+        fname = 'win64python2.npy'
+    else:
+        fname = 'python3.npy'
+    path = os.path.join(os.path.dirname(__file__), 'data', fname)
+    data = np.load(path)
+    assert_array_equal(data, np.ones(2))
+
+
 def test_version_2_0():
     f = BytesIO()
     # requires more than 2 byte for header
author	Julian Taylor <juliantaylor108@gmail.com>	2014-10-13 20:12:38 +0200
committer	Julian Taylor <juliantaylor108@gmail.com>	2014-10-13 20:12:38 +0200
commit	2f1786306e696709481ccfc95b844d5aa2700b6d (patch)
tree	d8d67febde9bda643339e8771fe941602128a776 /numpy/lib
parent	2ba94e726c7adf5b6ea7f2e49aadb78fe8b1d7ba (diff)
parent	8b1f90a227b5fcf2a481c973e522693758c9f20f (diff)
download	numpy-2f1786306e696709481ccfc95b844d5aa2700b6d.tar.gz