summaryrefslogtreecommitdiff
path: root/numpy/lib
diff options
context:
space:
mode:
authorCharles Harris <charlesr.harris@gmail.com>2014-10-12 13:01:57 -0600
committerCharles Harris <charlesr.harris@gmail.com>2014-10-12 17:40:49 -0600
commit8f068b7866a2959392fae2a15c1a5a19ff79bae9 (patch)
treed4a5c17802b5cb8e63fe0546cb93d4ded9343edf /numpy/lib
parent017969b193a135cd3bb63d5704bd11b62449f34e (diff)
downloadnumpy-8f068b7866a2959392fae2a15c1a5a19ff79bae9.tar.gz
BUG: Make python2 *.npy files readable in python3.
The Python2 generated file had long integer literals like '1L' that broke in Python3. The fix here is to filter out the 'L' and let safe_eval take care of the integer type in converting the string. The fix here comes from Nathaniel Smith with a few added fixups. Closes #5170.
Diffstat (limited to 'numpy/lib')
-rw-r--r--numpy/lib/format.py42
1 files changed, 41 insertions, 1 deletions
diff --git a/numpy/lib/format.py b/numpy/lib/format.py
index 98743b6ad..7c8dfbafa 100644
--- a/numpy/lib/format.py
+++ b/numpy/lib/format.py
@@ -141,7 +141,7 @@ import sys
import io
import warnings
from numpy.lib.utils import safe_eval
-from numpy.compat import asbytes, isfileobj, long, basestring
+from numpy.compat import asbytes, asstr, isfileobj, long, basestring
if sys.version_info[0] >= 3:
import pickle
@@ -410,6 +410,45 @@ def read_array_header_2_0(fp):
"""
_read_array_header(fp, version=(2, 0))
+
+def _filter_header(s):
+ """Clean up 'L' in npz header ints.
+
+ Cleans up the 'L' in strings representing integers. Needed to allow npz
+ headers produced in Python2 to be read in Python3.
+
+ Parameters
+ ----------
+ s : byte string
+ Npy file header.
+
+ Returns
+ -------
+ header : str
+ Cleaned up header.
+
+ """
+ import tokenize
+ if sys.version_info[0] >= 3:
+ from io import StringIO
+ else:
+ from StringIO import StringIO
+
+ tokens = []
+ last_token_was_number = False
+ for token in tokenize.generate_tokens(StringIO(asstr(s)).read):
+ token_type = token[0]
+ token_string = token[1]
+ if (last_token_was_number and
+ token_type == tokenize.NAME and
+ token_string == "L"):
+ continue
+ else:
+ tokens.append(token)
+ last_token_was_number = (token_type == tokenize.NUMBER)
+ return tokenize.untokenize(tokens)
+
+
def _read_array_header(fp, version):
"""
see read_array_header_1_0
@@ -434,6 +473,7 @@ def _read_array_header(fp, version):
# "shape" : tuple of int
# "fortran_order" : bool
# "descr" : dtype.descr
+ header = _filter_header(header)
try:
d = safe_eval(header)
except SyntaxError as e: