summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
authorPauli Virtanen <pav@iki.fi>2010-02-20 18:18:18 +0000
committerPauli Virtanen <pav@iki.fi>2010-02-20 18:18:18 +0000
commit9c77c439698e34656d21f5e13bdf12210f659735 (patch)
tree0a73fe08e4c31ddf9fc066c0b95486412915b097 /numpy
parentfe8b7034708ffdf0d2efec268c9852162da56078 (diff)
downloadnumpy-9c77c439698e34656d21f5e13bdf12210f659735.tar.gz
3K: lib: more str vs bytes issues in the lib/io loadtxt, savetxt and genfromtxt
Diffstat (limited to 'numpy')
-rw-r--r--numpy/compat/py3k.py2
-rw-r--r--numpy/lib/_iotools.py11
-rw-r--r--numpy/lib/io.py60
-rw-r--r--numpy/lib/tests/test__iotools.py27
-rw-r--r--numpy/lib/tests/test_io.py71
5 files changed, 104 insertions, 67 deletions
diff --git a/numpy/compat/py3k.py b/numpy/compat/py3k.py
index 7af73c3d0..7357bacff 100644
--- a/numpy/compat/py3k.py
+++ b/numpy/compat/py3k.py
@@ -21,7 +21,7 @@ if sys.version_info[0] >= 3:
def asstr(s):
if isinstance(s, str):
return s
- return bytes.decode('latin1')
+ return s.decode('latin1')
def isfileobj(f):
return isinstance(f, io.FileIO)
strchar = 'U'
diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py
index a19852ac6..5eb4c0005 100644
--- a/numpy/lib/_iotools.py
+++ b/numpy/lib/_iotools.py
@@ -6,7 +6,7 @@ import numpy as np
import numpy.core.numeric as nx
from __builtin__ import bool, int, long, float, complex, object, unicode, str
-from numpy.compat import asbytes, bytes
+from numpy.compat import asbytes, bytes, asbytes_nested
if sys.version_info[0] >= 3:
def _bytes_to_complex(s):
@@ -542,6 +542,11 @@ class StringConverter:
#
def __init__(self, dtype_or_func=None, default=None, missing_values=None,
locked=False):
+ # Convert unicode (for Py3)
+ if isinstance(missing_values, unicode):
+ missing_values = asbytes(missing_values)
+ elif isinstance(missing_values, (list, tuple)):
+ missing_values = asbytes_nested(missing_values)
# Defines a lock for upgrade
self._locked = bool(locked)
# No input dtype: minimal initialization
@@ -566,7 +571,7 @@ class StringConverter:
# If we don't have a default, try to guess it or set it to None
if default is None:
try:
- default = self.func('0')
+ default = self.func(asbytes('0'))
except ValueError:
default = None
ttype = self._getsubdtype(default)
@@ -729,7 +734,7 @@ class StringConverter:
self.type = self._getsubdtype(default)
else:
try:
- tester = func('1')
+ tester = func(asbytes('1'))
except (TypeError, ValueError):
tester = None
self.type = self._getsubdtype(tester)
diff --git a/numpy/lib/io.py b/numpy/lib/io.py
index f57231c56..8233fc7a0 100644
--- a/numpy/lib/io.py
+++ b/numpy/lib/io.py
@@ -8,7 +8,7 @@ __all__ = ['savetxt', 'loadtxt',
import numpy as np
import format
-import cStringIO
+import sys
import os
import sys
import itertools
@@ -24,7 +24,13 @@ from _iotools import LineSplitter, NameValidator, StringConverter, \
_is_string_like, has_nested_fields, flatten_dtype, \
easy_dtype, _bytes_to_name
-from numpy.compat import asbytes, asstr
+from numpy.compat import asbytes, asstr, asbytes_nested
+
+if sys.version_info[0] >= 3:
+ import io
+ BytesIO = io.BytesIO
+else:
+ from cStringIO import StringIO as BytesIO
_file = open
_string_like = _is_string_like
@@ -34,7 +40,7 @@ def seek_gzip_factory(f):
import on gzip.
"""
- import gzip, new
+ import gzip
def seek(self, offset, whence=0):
# figure out new position (we can only seek forwards)
@@ -58,8 +64,14 @@ def seek_gzip_factory(f):
if isinstance(f, str):
f = gzip.GzipFile(f)
- f.seek = new.instancemethod(seek, f)
- f.tell = new.instancemethod(tell, f)
+ if sys.version_info[0] >= 3:
+ import types
+ f.seek = types.MethodType(seek, f)
+ f.tell = types.MethodType(tell, f)
+ else:
+ import new
+ f.seek = new.instancemethod(seek, f)
+ f.tell = new.instancemethod(tell, f)
return f
@@ -180,7 +192,7 @@ class NpzFile(object):
if member:
bytes = self.zip.read(key)
if bytes.startswith(format.MAGIC_PREFIX):
- value = cStringIO.StringIO(bytes)
+ value = BytesIO(bytes)
return format.read_array(value)
else:
return bytes
@@ -474,12 +486,14 @@ def _getconv(dtype):
return float
elif issubclass(typ, np.complex):
return complex
+ elif issubclass(typ, np.bytes_):
+ return bytes
else:
return str
-def loadtxt(fname, dtype=float, comments=asbytes('#'), delimiter=None,
+def loadtxt(fname, dtype=float, comments='#', delimiter=None,
converters=None, skiprows=0, usecols=None, unpack=False):
"""
Load data from a text file.
@@ -555,6 +569,11 @@ def loadtxt(fname, dtype=float, comments=asbytes('#'), delimiter=None,
array([ 2., 4.])
"""
+ # Type conversions for Py3 convenience
+ comments = asbytes(comments)
+ if delimiter is not None:
+ delimiter = asbytes(delimiter)
+
user_converters = converters
if usecols is not None:
@@ -768,9 +787,9 @@ def savetxt(fname, X, fmt='%.18e', delimiter=' ', newline='\n'):
"""
# Py3 conversions first
- if isinstance(format, bytes):
- format = asstr(format)
- delimiter = asbytes(delimiter)
+ if isinstance(fmt, bytes):
+ fmt = asstr(fmt)
+ delimiter = asstr(delimiter)
if _is_string_like(fname):
if fname.endswith('.gz'):
@@ -877,9 +896,9 @@ def fromregex(file, regexp, dtype):
"""
if not hasattr(file, "read"):
- file = open(file, 'r')
+ file = open(file, 'rb')
if not hasattr(regexp, 'match'):
- regexp = re.compile(regexp)
+ regexp = re.compile(asbytes(regexp))
if not isinstance(dtype, np.dtype):
dtype = np.dtype(dtype)
@@ -905,9 +924,9 @@ def fromregex(file, regexp, dtype):
-def genfromtxt(fname, dtype=float, comments=asbytes('#'), delimiter=None,
+def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
skiprows=0, skip_header=0, skip_footer=0, converters=None,
- missing=asbytes(''), missing_values=None, filling_values=None,
+ missing='', missing_values=None, filling_values=None,
usecols=None, names=None, excludelist=None, deletechars=None,
autostrip=False, case_sensitive=True, defaultfmt="f%i",
unpack=None, usemask=False, loose=True, invalid_raise=True):
@@ -1042,6 +1061,15 @@ def genfromtxt(fname, dtype=float, comments=asbytes('#'), delimiter=None,
dtype=[('intvar', '<i8'), ('fltvar', '<f8'), ('strvar', '|S5')])
"""
+ # Py3 data conversions to bytes, for convenience
+ comments = asbytes(comments)
+ if isinstance(delimiter, unicode):
+ delimiter = asbytes(delimiter)
+ if isinstance(missing, unicode):
+ missing = asbytes(missing)
+ if isinstance(missing_values, (unicode, list, tuple)):
+ missing_values = asbytes_nested(missing_values)
+
#
if usemask:
from numpy.ma import MaskedArray, make_mask_descr
@@ -1182,7 +1210,7 @@ def genfromtxt(fname, dtype=float, comments=asbytes('#'), delimiter=None,
entry.append(value)
# We have a string : apply it to all entries
elif isinstance(user_missing_values, basestring):
- user_value = user_missing_values.split(",")
+ user_value = user_missing_values.split(asbytes(","))
for entry in missing_values:
entry.extend(user_value)
# We have something else: apply it to all entries
@@ -1195,7 +1223,7 @@ def genfromtxt(fname, dtype=float, comments=asbytes('#'), delimiter=None,
warnings.warn("The use of `missing` is deprecated.\n"\
"Please use `missing_values` instead.",
DeprecationWarning)
- values = [str(_) for _ in missing.split(",")]
+ values = [str(_) for _ in missing.split(asbytes(","))]
for entry in missing_values:
entry.extend(values)
diff --git a/numpy/lib/tests/test__iotools.py b/numpy/lib/tests/test__iotools.py
index d105cf835..7c45b3527 100644
--- a/numpy/lib/tests/test__iotools.py
+++ b/numpy/lib/tests/test__iotools.py
@@ -6,6 +6,9 @@ if sys.version_info[0] >= 3:
else:
from StringIO import StringIO
+from datetime import date
+import time
+
import numpy as np
from numpy.lib._iotools import LineSplitter, NameValidator, StringConverter,\
has_nested_fields, easy_dtype
@@ -130,6 +133,12 @@ class TestNameValidator(TestCase):
#-------------------------------------------------------------------------------
+def _bytes_to_date(s):
+ if sys.version_info[0] >= 3:
+ return date(*time.strptime(s.decode('latin1'), "%Y-%m-%d")[:3])
+ else:
+ return date(*time.strptime(s, "%Y-%m-%d")[:3])
+
class TestStringConverter(TestCase):
"Test StringConverter"
#
@@ -168,27 +177,19 @@ class TestStringConverter(TestCase):
#
def test_upgrademapper(self):
"Tests updatemapper"
- from datetime import date
- import time
- if sys.version_info[0] >= 3:
- dateparser = lambda s : date(*time.strptime(s.decode('latin1'),
- "%Y-%m-%d")[:3])
- else:
- dateparser = lambda s : date(*time.strptime(s, "%Y-%m-%d")[:3])
+ dateparser = _bytes_to_date
StringConverter.upgrade_mapper(dateparser, date(2000,1,1))
convert = StringConverter(dateparser, date(2000, 1, 1))
- test = convert('2001-01-01')
+ test = convert(asbytes('2001-01-01'))
assert_equal(test, date(2001, 01, 01))
- test = convert('2009-01-01')
+ test = convert(asbytes('2009-01-01'))
assert_equal(test, date(2009, 01, 01))
- test = convert('')
+ test = convert(asbytes(''))
assert_equal(test, date(2000, 01, 01))
#
def test_string_to_object(self):
"Make sure that string-to-object functions are properly recognized"
- from datetime import date
- import time
- conv = StringConverter(lambda s: date(*(time.strptime(s)[:3])))
+ conv = StringConverter(_bytes_to_date)
assert_equal(conv._mapper[-2][0](0), 0j)
assert(hasattr(conv, 'default'))
#
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
index dd1bfbad8..2b4d542c7 100644
--- a/numpy/lib/tests/test_io.py
+++ b/numpy/lib/tests/test_io.py
@@ -15,7 +15,7 @@ from datetime import datetime
from numpy.lib._iotools import ConverterError, ConverterLockError, \
ConversionWarning
-from numpy.compat import asbytes
+from numpy.compat import asbytes, asbytes_nested
if sys.version_info[0] >= 3:
from io import BytesIO
@@ -31,7 +31,10 @@ def strptime(s, fmt=None):
from Python >= 2.5.
"""
- return datetime(*time.strptime(s, fmt)[:3])
+ if sys.version_info[0] >= 3:
+ return datetime(*time.strptime(s.decode('latin1'), fmt)[:3])
+ else:
+ return datetime(*time.strptime(s, fmt)[:3])
class RoundtripTest(object):
def roundtrip(self, save_func, *args, **kwargs):
@@ -175,7 +178,7 @@ class TestSaveTxt(TestCase):
c = StringIO()
np.savetxt(c, a, fmt='%d')
c.seek(0)
- assert_equal(c.readlines(), ['1 2\n', '3 4\n'])
+ assert_equal(c.readlines(), asbytes_nested(['1 2\n', '3 4\n']))
def test_1D(self):
a = np.array([1, 2, 3, 4], int)
@@ -190,7 +193,7 @@ class TestSaveTxt(TestCase):
c = StringIO()
np.savetxt(c, a, fmt='%d')
c.seek(0)
- assert_equal(c.readlines(), ['1 2\n', '3 4\n'])
+ assert_equal(c.readlines(), asbytes_nested(['1 2\n', '3 4\n']))
def test_delimiter(self):
a = np.array([[1., 2.], [3., 4.]])
@@ -205,34 +208,34 @@ class TestSaveTxt(TestCase):
# Sequence of formats
np.savetxt(c, a, fmt=['%02d', '%3.1f'])
c.seek(0)
- assert_equal(c.readlines(), ['01 2.0\n', '03 4.0\n'])
+ assert_equal(c.readlines(), asbytes_nested(['01 2.0\n', '03 4.0\n']))
# A single multiformat string
c = StringIO()
np.savetxt(c, a, fmt='%02d : %3.1f')
c.seek(0)
lines = c.readlines()
- assert_equal(lines, ['01 : 2.0\n', '03 : 4.0\n'])
+ assert_equal(lines, asbytes_nested(['01 : 2.0\n', '03 : 4.0\n']))
# Specify delimiter, should be overiden
c = StringIO()
np.savetxt(c, a, fmt='%02d : %3.1f', delimiter=',')
c.seek(0)
lines = c.readlines()
- assert_equal(lines, ['01 : 2.0\n', '03 : 4.0\n'])
+ assert_equal(lines, asbytes_nested(['01 : 2.0\n', '03 : 4.0\n']))
class TestLoadTxt(TestCase):
def test_record(self):
c = StringIO()
- c.write('1 2\n3 4')
+ c.write(asbytes('1 2\n3 4'))
c.seek(0)
x = np.loadtxt(c, dtype=[('x', np.int32), ('y', np.int32)])
a = np.array([(1, 2), (3, 4)], dtype=[('x', 'i4'), ('y', 'i4')])
assert_array_equal(x, a)
d = StringIO()
- d.write('M 64.0 75.0\nF 25.0 60.0')
+ d.write(asbytes('M 64.0 75.0\nF 25.0 60.0'))
d.seek(0)
mydescriptor = {'names': ('gender', 'age', 'weight'),
'formats': ('S1',
@@ -244,7 +247,7 @@ class TestLoadTxt(TestCase):
def test_array(self):
c = StringIO()
- c.write('1 2\n3 4')
+ c.write(asbytes('1 2\n3 4'))
c.seek(0)
x = np.loadtxt(c, dtype=int)
@@ -258,14 +261,14 @@ class TestLoadTxt(TestCase):
def test_1D(self):
c = StringIO()
- c.write('1\n2\n3\n4\n')
+ c.write(asbytes('1\n2\n3\n4\n'))
c.seek(0)
x = np.loadtxt(c, dtype=int)
a = np.array([1, 2, 3, 4], int)
assert_array_equal(x, a)
c = StringIO()
- c.write('1,2,3,4\n')
+ c.write(asbytes('1,2,3,4\n'))
c.seek(0)
x = np.loadtxt(c, dtype=int, delimiter=',')
a = np.array([1, 2, 3, 4], int)
@@ -273,7 +276,7 @@ class TestLoadTxt(TestCase):
def test_missing(self):
c = StringIO()
- c.write('1,2,3,,5\n')
+ c.write(asbytes('1,2,3,,5\n'))
c.seek(0)
x = np.loadtxt(c, dtype=int, delimiter=',', \
converters={3:lambda s: int(s or - 999)})
@@ -282,7 +285,7 @@ class TestLoadTxt(TestCase):
def test_converters_with_usecols(self):
c = StringIO()
- c.write('1,2,3,,5\n6,7,8,9,10\n')
+ c.write(asbytes('1,2,3,,5\n6,7,8,9,10\n'))
c.seek(0)
x = np.loadtxt(c, dtype=int, delimiter=',', \
converters={3:lambda s: int(s or - 999)}, \
@@ -292,7 +295,7 @@ class TestLoadTxt(TestCase):
def test_comments(self):
c = StringIO()
- c.write('# comment\n1,2,3,5\n')
+ c.write(asbytes('# comment\n1,2,3,5\n'))
c.seek(0)
x = np.loadtxt(c, dtype=int, delimiter=',', \
comments='#')
@@ -301,7 +304,7 @@ class TestLoadTxt(TestCase):
def test_skiprows(self):
c = StringIO()
- c.write('comment\n1,2,3,5\n')
+ c.write(asbytes('comment\n1,2,3,5\n'))
c.seek(0)
x = np.loadtxt(c, dtype=int, delimiter=',', \
skiprows=1)
@@ -309,7 +312,7 @@ class TestLoadTxt(TestCase):
assert_array_equal(x, a)
c = StringIO()
- c.write('# comment\n1,2,3,5\n')
+ c.write(asbytes('# comment\n1,2,3,5\n'))
c.seek(0)
x = np.loadtxt(c, dtype=int, delimiter=',', \
skiprows=1)
@@ -344,12 +347,12 @@ class TestLoadTxt(TestCase):
names = ['stid', 'temp']
dtypes = ['S4', 'f8']
arr = np.loadtxt(c, usecols=(0, 2), dtype=zip(names, dtypes))
- assert_equal(arr['stid'], ["JOE", "BOB"])
+ assert_equal(arr['stid'], asbytes_nested(["JOE", "BOB"]))
assert_equal(arr['temp'], [25.3, 27.9])
def test_fancy_dtype(self):
c = StringIO()
- c.write('1,2,3.0\n4,5,6.0\n')
+ c.write(asbytes('1,2,3.0\n4,5,6.0\n'))
c.seek(0)
dt = np.dtype([('x', int), ('y', [('t', int), ('s', float)])])
x = np.loadtxt(c, dtype=dt, delimiter=',')
@@ -371,7 +374,7 @@ class TestLoadTxt(TestCase):
def test_unused_converter(self):
c = StringIO()
- c.writelines(['1 21\n', '3 42\n'])
+ c.writelines([asbytes('1 21\n'), asbytes('3 42\n')])
c.seek(0)
data = np.loadtxt(c, usecols=(1,),
converters={0: lambda s: int(s, 16)})
@@ -404,7 +407,7 @@ class TestLoadTxt(TestCase):
class Testfromregex(TestCase):
def test_record(self):
c = StringIO()
- c.write('1.312 foo\n1.534 bar\n4.444 qux')
+ c.write(asbytes('1.312 foo\n1.534 bar\n4.444 qux'))
c.seek(0)
dt = [('num', np.float64), ('val', 'S3')]
@@ -415,7 +418,7 @@ class Testfromregex(TestCase):
def test_record_2(self):
c = StringIO()
- c.write('1312 foo\n1534 bar\n4444 qux')
+ c.write(asbytes('1312 foo\n1534 bar\n4444 qux'))
c.seek(0)
dt = [('num', np.int32), ('val', 'S3')]
@@ -426,7 +429,7 @@ class Testfromregex(TestCase):
def test_record_3(self):
c = StringIO()
- c.write('1312 foo\n1534 bar\n4444 qux')
+ c.write(asbytes('1312 foo\n1534 bar\n4444 qux'))
c.seek(0)
dt = [('num', np.float64)]
@@ -521,7 +524,7 @@ class TestFromTxt(TestCase):
"Test retrieving a header"
data = StringIO('gender age weight\nM 64.0 75.0\nF 25.0 60.0')
test = np.ndfromtxt(data, dtype=None, names=True)
- control = {'gender': np.array(['M', 'F']),
+ control = {'gender': np.array(asbytes_nested(['M', 'F'])),
'age': np.array([64.0, 25.0]),
'weight': np.array([75.0, 60.0])}
assert_equal(test['gender'], control['gender'])
@@ -532,7 +535,7 @@ class TestFromTxt(TestCase):
"Test the automatic definition of the output dtype"
data = StringIO('A 64 75.0 3+4j True\nBCD 25 60.0 5+6j False')
test = np.ndfromtxt(data, dtype=None)
- control = [np.array(['A', 'BCD']),
+ control = [np.array(asbytes_nested(['A', 'BCD'])),
np.array([64, 25]),
np.array([75.0, 60.0]),
np.array([3 + 4j, 5 + 6j]),
@@ -649,10 +652,10 @@ M 33 21.99
def test_invalid_converter(self):
- strip_rand = lambda x : float(('r' in x.lower() and x.split()[-1]) or
- (not 'r' in x.lower() and x.strip() or 0.0))
- strip_per = lambda x : float(('%' in x.lower() and x.split()[0]) or
- (not '%' in x.lower() and x.strip() or 0.0))
+ strip_rand = lambda x : float((asbytes('r') in x.lower() and x.split()[-1]) or
+ (not asbytes('r') in x.lower() and x.strip() or 0.0))
+ strip_per = lambda x : float((asbytes('%') in x.lower() and x.split()[0]) or
+ (not asbytes('%') in x.lower() and x.strip() or 0.0))
s = StringIO("D01N01,10/1/2003 ,1 %,R 75,400,600\r\n" \
"L24U05,12/5/2003, 2 %,1,300, 150.5\r\n"
"D02N03,10/10/2004,R 1,,7,145.55")
@@ -678,10 +681,10 @@ M 33 21.99
"Test using an explicit dtype with an object"
from datetime import date
import time
- data = """
+ data = asbytes("""
1; 2001-01-01
2; 2002-01-31
- """
+ """)
ndtype = [('idx', int), ('code', np.object)]
func = lambda s: strptime(s.strip(), "%Y-%m-%d")
converters = {1: func}
@@ -775,7 +778,7 @@ M 33 21.99
names = ['stid', 'temp']
dtypes = ['S4', 'f8']
test = np.ndfromtxt(data, usecols=(0, 2), dtype=zip(names, dtypes))
- assert_equal(test['stid'], ["JOE", "BOB"])
+ assert_equal(test['stid'], asbytes_nested(["JOE", "BOB"]))
assert_equal(test['temp'], [25.3, 27.9])
def test_usecols_with_integer(self):
@@ -1153,7 +1156,7 @@ def test_gzip_loadtxt():
# which is then read from by the loadtxt function
s = StringIO()
g = gzip.GzipFile(fileobj=s, mode='w')
- g.write('1 2 3\n')
+ g.write(asbytes('1 2 3\n'))
g.close()
s.seek(0)
@@ -1169,7 +1172,7 @@ def test_gzip_loadtxt():
def test_gzip_loadtxt_from_string():
s = StringIO()
f = gzip.GzipFile(fileobj=s, mode="w")
- f.write('1 2 3\n')
+ f.write(asbytes('1 2 3\n'))
f.close()
s.seek(0)