* _iotools.StringConverter

- use '1' instead of '0' to test the update - add `iterupgrade` to upgrade from an iterator * io.genfromtxt (bug #1212) - use `iterupgrade` to upgrade the converters, and reprocess if there's a problem to catch the offending line
author: pierregm <pierregm@localhost> 2009-10-06 03:47:07 +0000
committer: pierregm <pierregm@localhost> 2009-10-06 03:47:07 +0000
commit: 6386708bcd6deaf3f6e5f145fb59c0d4b9af86d9 (patch)
tree: c5ef08d00870e076b37298cf37c9bb4ca868d5fb /numpy
parent: 9a41e774079340e1a8887d9f2310c2458580ec6c (diff)
download: numpy-6386708bcd6deaf3f6e5f145fb59c0d4b9af86d9.tar.gz
3 files changed, 88 insertions, 14 deletions
diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py
index c06275d1b..02385305b 100644
--- a/numpy/lib/_iotools.py
+++ b/numpy/lib/_iotools.py
@@ -370,6 +370,12 @@ def str2bool(value):
         raise ValueError("Invalid boolean")
 
 
+class ConverterError(Exception):
+    pass
+
+class ConverterLockError(ConverterError):
+    pass
+
 
 class StringConverter:
     """
@@ -574,16 +580,39 @@ class StringConverter:
         except ValueError:
             # Raise an exception if we locked the converter...
             if self._locked:
-                raise ValueError("Converter is locked and cannot be upgraded")
+                errmsg = "Converter is locked and cannot be upgraded"
+                raise ConverterLockError(errmsg)
             _statusmax = len(self._mapper)
             # Complains if we try to upgrade by the maximum
             if self._status == _statusmax:
-                raise ValueError("Could not find a valid conversion function")
+                errmsg = "Could not find a valid conversion function"
+                raise ConverterError(errmsg)
             elif self._status < _statusmax - 1:
                 self._status += 1
             (self.type, self.func, self.default) = self._mapper[self._status]
             self.upgrade(value)
-    #
+
+    def iterupgrade(self, value):
+        self._checked = True
+        if not hasattr(value, '__iter__'):
+            value = (value,)
+        _strict_call = self._strict_call
+        try:
+            map(_strict_call, value)
+        except ValueError:
+            # Raise an exception if we locked the converter...
+            if self._locked:
+                errmsg = "Converter is locked and cannot be upgraded"
+                raise ConverterLockError(errmsg)
+            _statusmax = len(self._mapper)
+            # Complains if we try to upgrade by the maximum
+            if self._status == _statusmax:
+                raise ConverterError("Could not find a valid conversion function")
+            elif self._status < _statusmax - 1:
+                self._status += 1
+            (self.type, self.func, self.default) = self._mapper[self._status]
+            self.iterupgrade(value)
+
     def update(self, func, default=None, missing_values='', locked=False):
         """
         Set StringConverter attributes directly.
@@ -617,7 +646,7 @@ class StringConverter:
             self.type = self._getsubdtype(default)
         else:
             try:
-                tester = func('0')
+                tester = func('1')
             except (TypeError, ValueError):
                 tester = None
             self.type = self._getsubdtype(tester)
diff --git a/numpy/lib/io.py b/numpy/lib/io.py
index ba804e43f..255c5a7f5 100644
--- a/numpy/lib/io.py
+++ b/numpy/lib/io.py
@@ -12,12 +12,14 @@ import cStringIO
 import os
 import itertools
 import warnings
+from operator import itemgetter
 
 from cPickle import load as _cload, loads
 from _datasource import DataSource
 from _compiled_base import packbits, unpackbits
 
 from _iotools import LineSplitter, NameValidator, StringConverter, \
+                     ConverterError, ConverterLockError, \
                      _is_string_like, has_nested_fields, flatten_dtype
 
 _file = file
@@ -1176,16 +1178,29 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, skiprows=0,
         elif nbvalues != nbcols:
             append_to_invalid((i, nbvalues))
             continue
-        # Check whether we need to update the converter
-        if dtype is None:
-            for (converter, item) in zip(converters, values):
-                converter.upgrade(item)
         # Store the values
         append_to_rows(tuple(values))
         if usemask:
             append_to_masks(tuple([val.strip() in mss
                             for (val, mss) in zip(values, missing)]))
 
+    # Upgrade the converters (if needed)
+    if dtype is None:
+        for (i, converter) in enumerate(converters):
+            current_column = map(itemgetter(i), rows)
+            try:
+                converter.iterupgrade(current_column)
+            except ConverterLockError:
+                errmsg = "Converter #%i is locked and cannot be upgraded: " % i
+                current_column = itertools.imap(itemgetter(i), rows)
+                for (j, value) in enumerate(current_column):
+                    try:
+                        converter.upgrade(value)
+                    except (ConverterError, ValueError):
+                        errmsg += "(occurred line #%i for value '%s')"
+                        errmsg %= (j + 1 + skiprows, value)
+                        raise ConverterError(errmsg)
+
     # Check that we don't have invalid values
     if len(invalid) > 0:
         # Construct the error message
@@ -1202,14 +1217,19 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, skiprows=0,
 
     # Convert each value according to the converter:
     # We want to modify the list in place to avoid creating a new one...
+#    if loose:
+#        conversionfuncs = [conv._loose_call for conv in converters]
+#    else:
+#        conversionfuncs = [conv._strict_call for conv in converters]
+#    for (i, vals) in enumerate(rows):
+#        rows[i] = tuple([convert(val)
+#                         for (convert, val) in zip(conversionfuncs, vals)])
     if loose:
-        conversionfuncs = [conv._loose_call for conv in converters]
+        rows = zip(*(map(converter._loose_call, map(itemgetter(i), rows))
+                     for (i, converter) in enumerate(converters)))
     else:
-        conversionfuncs = [conv._strict_call for conv in converters]
-    for (i, vals) in enumerate(rows):
-        rows[i] = tuple([convert(val)
-                         for (convert, val) in zip(conversionfuncs, vals)])
-
+        rows = zip(*(map(converter._strict_call, map(itemgetter(i), rows))
+                     for (i, converter) in enumerate(converters)))
     # Reset the dtype
     data = rows
     if dtype is None:
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
index ecf87980b..bc05ed4d3 100644
--- a/numpy/lib/tests/test_io.py
+++ b/numpy/lib/tests/test_io.py
@@ -11,6 +11,8 @@ from tempfile import mkstemp, NamedTemporaryFile
 import sys, time
 from datetime import datetime
 
+from numpy.lib._iotools import ConverterError, ConverterLockError
+
 
 MAJVER, MINVER = sys.version_info[:2]
 
@@ -624,6 +626,19 @@ M   33  21.99
         assert_equal(test, [33, 66])
 
 
+    def test_invalid_converter(self):
+        strip_rand = lambda x : float(('r' in x.lower() and x.split()[-1]) or
+                                      (not 'r' in x.lower() and x.strip() or 0.0))
+        strip_per = lambda x : float(('%' in x.lower() and x.split()[0]) or
+                                     (not '%' in x.lower() and x.strip() or 0.0))
+        s = StringIO.StringIO("D01N01,10/1/2003 ,1 %,R 75,400,600\r\n" \
+                              "L24U05,12/5/2003, 2 %,1,300, 150.5\r\n"
+                              "D02N03,10/10/2004,R 1,,7,145.55")
+        kwargs = dict(converters={2 : strip_per, 3 : strip_rand}, delimiter=",",
+                      dtype=None)
+        assert_raises(ConverterError, np.genfromtxt, s, **kwargs)
+
+
     def test_dtype_with_converters(self):
         dstr = "2009; 23; 46"
         test = np.ndfromtxt(StringIO.StringIO(dstr,),
@@ -863,6 +878,16 @@ M   33  21.99
         assert_equal(mtest, control)
 
 
+    def test_inconsistent_dtype(self):
+        data = ["1, 1, 1, 1, -1.1"] * 50
+        mdata = StringIO.StringIO("\n".join(data))
+
+        converters = {4: lambda x:np.sqrt(float(x))}
+        kwargs = dict(delimiter=",", converters=converters,
+                      dtype=[(_, int) for _ in 'abcde'],)
+        assert_raises(TypeError, np.genfromtxt, mdata, **kwargs)
+
+
     def test_recfromtxt(self):
         #
         data = StringIO.StringIO('A,B\n0,1\n2,3')
author	pierregm <pierregm@localhost>	2009-10-06 03:47:07 +0000
committer	pierregm <pierregm@localhost>	2009-10-06 03:47:07 +0000
commit	6386708bcd6deaf3f6e5f145fb59c0d4b9af86d9 (patch)
tree	c5ef08d00870e076b37298cf37c9bb4ca868d5fb /numpy
parent	9a41e774079340e1a8887d9f2310c2458580ec6c (diff)
download	numpy-6386708bcd6deaf3f6e5f145fb59c0d4b9af86d9.tar.gz