BUG: Make python2 *.npy files readable in python3.

The Python2 generated file had long integer literals like '1L' that broke in Python3. The fix here is to filter out the 'L' and let safe_eval take care of the integer type in converting the string. The fix here comes from Nathaniel Smith with a few added fixups. Closes #5170.
author: Charles Harris <charlesr.harris@gmail.com> 2014-10-12 13:01:57 -0600
committer: Charles Harris <charlesr.harris@gmail.com> 2014-10-12 17:40:49 -0600
commit: 8f068b7866a2959392fae2a15c1a5a19ff79bae9 (patch)
tree: d4a5c17802b5cb8e63fe0546cb93d4ded9343edf /numpy/lib
parent: 017969b193a135cd3bb63d5704bd11b62449f34e (diff)
download: numpy-8f068b7866a2959392fae2a15c1a5a19ff79bae9.tar.gz
1 files changed, 41 insertions, 1 deletions
diff --git a/numpy/lib/format.py b/numpy/lib/format.py
index 98743b6ad..7c8dfbafa 100644
--- a/numpy/lib/format.py
+++ b/numpy/lib/format.py
@@ -141,7 +141,7 @@ import sys
 import io
 import warnings
 from numpy.lib.utils import safe_eval
-from numpy.compat import asbytes, isfileobj, long, basestring
+from numpy.compat import asbytes, asstr, isfileobj, long, basestring
 
 if sys.version_info[0] >= 3:
     import pickle
@@ -410,6 +410,45 @@ def read_array_header_2_0(fp):
     """
     _read_array_header(fp, version=(2, 0))
 
+
+def _filter_header(s):
+    """Clean up 'L' in npz header ints.
+
+    Cleans up the 'L' in strings representing integers. Needed to allow npz
+    headers produced in Python2 to be read in Python3.
+
+    Parameters
+    ----------
+    s : byte string
+        Npy file header.
+
+    Returns
+    -------
+    header : str
+        Cleaned up header.
+
+    """
+    import tokenize
+    if sys.version_info[0] >= 3:
+        from io import StringIO
+    else:
+        from StringIO import StringIO
+ 
+    tokens = []
+    last_token_was_number = False
+    for token in tokenize.generate_tokens(StringIO(asstr(s)).read):
+        token_type = token[0]
+        token_string = token[1]
+        if (last_token_was_number and
+                token_type == tokenize.NAME and
+                token_string == "L"):
+            continue
+        else:
+            tokens.append(token)
+        last_token_was_number = (token_type == tokenize.NUMBER)
+    return tokenize.untokenize(tokens)
+
+ 
 def _read_array_header(fp, version):
     """
     see read_array_header_1_0
@@ -434,6 +473,7 @@ def _read_array_header(fp, version):
     #   "shape" : tuple of int
     #   "fortran_order" : bool
     #   "descr" : dtype.descr
+    header = _filter_header(header)
     try:
         d = safe_eval(header)
     except SyntaxError as e:
author	Charles Harris <charlesr.harris@gmail.com>	2014-10-12 13:01:57 -0600
committer	Charles Harris <charlesr.harris@gmail.com>	2014-10-12 17:40:49 -0600
commit	8f068b7866a2959392fae2a15c1a5a19ff79bae9 (patch)
tree	d4a5c17802b5cb8e63fe0546cb93d4ded9343edf /numpy/lib
parent	017969b193a135cd3bb63d5704bd11b62449f34e (diff)
download	numpy-8f068b7866a2959392fae2a15c1a5a19ff79bae9.tar.gz