summaryrefslogtreecommitdiff
path: root/numpy
diff options
context:
space:
mode:
authorpierregm <pierregm@localhost>2009-10-06 03:47:07 +0000
committerpierregm <pierregm@localhost>2009-10-06 03:47:07 +0000
commit6386708bcd6deaf3f6e5f145fb59c0d4b9af86d9 (patch)
treec5ef08d00870e076b37298cf37c9bb4ca868d5fb /numpy
parent9a41e774079340e1a8887d9f2310c2458580ec6c (diff)
downloadnumpy-6386708bcd6deaf3f6e5f145fb59c0d4b9af86d9.tar.gz
* _iotools.StringConverter
- use '1' instead of '0' to test the update - add `iterupgrade` to upgrade from an iterator * io.genfromtxt (bug #1212) - use `iterupgrade` to upgrade the converters, and reprocess if there's a problem to catch the offending line
Diffstat (limited to 'numpy')
-rw-r--r--numpy/lib/_iotools.py37
-rw-r--r--numpy/lib/io.py40
-rw-r--r--numpy/lib/tests/test_io.py25
3 files changed, 88 insertions, 14 deletions
diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py
index c06275d1b..02385305b 100644
--- a/numpy/lib/_iotools.py
+++ b/numpy/lib/_iotools.py
@@ -370,6 +370,12 @@ def str2bool(value):
raise ValueError("Invalid boolean")
+class ConverterError(Exception):
+ pass
+
+class ConverterLockError(ConverterError):
+ pass
+
class StringConverter:
"""
@@ -574,16 +580,39 @@ class StringConverter:
except ValueError:
# Raise an exception if we locked the converter...
if self._locked:
- raise ValueError("Converter is locked and cannot be upgraded")
+ errmsg = "Converter is locked and cannot be upgraded"
+ raise ConverterLockError(errmsg)
_statusmax = len(self._mapper)
# Complains if we try to upgrade by the maximum
if self._status == _statusmax:
- raise ValueError("Could not find a valid conversion function")
+ errmsg = "Could not find a valid conversion function"
+ raise ConverterError(errmsg)
elif self._status < _statusmax - 1:
self._status += 1
(self.type, self.func, self.default) = self._mapper[self._status]
self.upgrade(value)
- #
+
+ def iterupgrade(self, value):
+ self._checked = True
+ if not hasattr(value, '__iter__'):
+ value = (value,)
+ _strict_call = self._strict_call
+ try:
+ map(_strict_call, value)
+ except ValueError:
+ # Raise an exception if we locked the converter...
+ if self._locked:
+ errmsg = "Converter is locked and cannot be upgraded"
+ raise ConverterLockError(errmsg)
+ _statusmax = len(self._mapper)
+ # Complains if we try to upgrade by the maximum
+ if self._status == _statusmax:
+ raise ConverterError("Could not find a valid conversion function")
+ elif self._status < _statusmax - 1:
+ self._status += 1
+ (self.type, self.func, self.default) = self._mapper[self._status]
+ self.iterupgrade(value)
+
def update(self, func, default=None, missing_values='', locked=False):
"""
Set StringConverter attributes directly.
@@ -617,7 +646,7 @@ class StringConverter:
self.type = self._getsubdtype(default)
else:
try:
- tester = func('0')
+ tester = func('1')
except (TypeError, ValueError):
tester = None
self.type = self._getsubdtype(tester)
diff --git a/numpy/lib/io.py b/numpy/lib/io.py
index ba804e43f..255c5a7f5 100644
--- a/numpy/lib/io.py
+++ b/numpy/lib/io.py
@@ -12,12 +12,14 @@ import cStringIO
import os
import itertools
import warnings
+from operator import itemgetter
from cPickle import load as _cload, loads
from _datasource import DataSource
from _compiled_base import packbits, unpackbits
from _iotools import LineSplitter, NameValidator, StringConverter, \
+ ConverterError, ConverterLockError, \
_is_string_like, has_nested_fields, flatten_dtype
_file = file
@@ -1176,16 +1178,29 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, skiprows=0,
elif nbvalues != nbcols:
append_to_invalid((i, nbvalues))
continue
- # Check whether we need to update the converter
- if dtype is None:
- for (converter, item) in zip(converters, values):
- converter.upgrade(item)
# Store the values
append_to_rows(tuple(values))
if usemask:
append_to_masks(tuple([val.strip() in mss
for (val, mss) in zip(values, missing)]))
+ # Upgrade the converters (if needed)
+ if dtype is None:
+ for (i, converter) in enumerate(converters):
+ current_column = map(itemgetter(i), rows)
+ try:
+ converter.iterupgrade(current_column)
+ except ConverterLockError:
+ errmsg = "Converter #%i is locked and cannot be upgraded: " % i
+ current_column = itertools.imap(itemgetter(i), rows)
+ for (j, value) in enumerate(current_column):
+ try:
+ converter.upgrade(value)
+ except (ConverterError, ValueError):
+ errmsg += "(occurred line #%i for value '%s')"
+ errmsg %= (j + 1 + skiprows, value)
+ raise ConverterError(errmsg)
+
# Check that we don't have invalid values
if len(invalid) > 0:
# Construct the error message
@@ -1202,14 +1217,19 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, skiprows=0,
# Convert each value according to the converter:
# We want to modify the list in place to avoid creating a new one...
+# if loose:
+# conversionfuncs = [conv._loose_call for conv in converters]
+# else:
+# conversionfuncs = [conv._strict_call for conv in converters]
+# for (i, vals) in enumerate(rows):
+# rows[i] = tuple([convert(val)
+# for (convert, val) in zip(conversionfuncs, vals)])
if loose:
- conversionfuncs = [conv._loose_call for conv in converters]
+ rows = zip(*(map(converter._loose_call, map(itemgetter(i), rows))
+ for (i, converter) in enumerate(converters)))
else:
- conversionfuncs = [conv._strict_call for conv in converters]
- for (i, vals) in enumerate(rows):
- rows[i] = tuple([convert(val)
- for (convert, val) in zip(conversionfuncs, vals)])
-
+ rows = zip(*(map(converter._strict_call, map(itemgetter(i), rows))
+ for (i, converter) in enumerate(converters)))
# Reset the dtype
data = rows
if dtype is None:
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
index ecf87980b..bc05ed4d3 100644
--- a/numpy/lib/tests/test_io.py
+++ b/numpy/lib/tests/test_io.py
@@ -11,6 +11,8 @@ from tempfile import mkstemp, NamedTemporaryFile
import sys, time
from datetime import datetime
+from numpy.lib._iotools import ConverterError, ConverterLockError
+
MAJVER, MINVER = sys.version_info[:2]
@@ -624,6 +626,19 @@ M 33 21.99
assert_equal(test, [33, 66])
+ def test_invalid_converter(self):
+ strip_rand = lambda x : float(('r' in x.lower() and x.split()[-1]) or
+ (not 'r' in x.lower() and x.strip() or 0.0))
+ strip_per = lambda x : float(('%' in x.lower() and x.split()[0]) or
+ (not '%' in x.lower() and x.strip() or 0.0))
+ s = StringIO.StringIO("D01N01,10/1/2003 ,1 %,R 75,400,600\r\n" \
+ "L24U05,12/5/2003, 2 %,1,300, 150.5\r\n"
+ "D02N03,10/10/2004,R 1,,7,145.55")
+ kwargs = dict(converters={2 : strip_per, 3 : strip_rand}, delimiter=",",
+ dtype=None)
+ assert_raises(ConverterError, np.genfromtxt, s, **kwargs)
+
+
def test_dtype_with_converters(self):
dstr = "2009; 23; 46"
test = np.ndfromtxt(StringIO.StringIO(dstr,),
@@ -863,6 +878,16 @@ M 33 21.99
assert_equal(mtest, control)
+ def test_inconsistent_dtype(self):
+ data = ["1, 1, 1, 1, -1.1"] * 50
+ mdata = StringIO.StringIO("\n".join(data))
+
+ converters = {4: lambda x:np.sqrt(float(x))}
+ kwargs = dict(delimiter=",", converters=converters,
+ dtype=[(_, int) for _ in 'abcde'],)
+ assert_raises(TypeError, np.genfromtxt, mdata, **kwargs)
+
+
def test_recfromtxt(self):
#
data = StringIO.StringIO('A,B\n0,1\n2,3')