* _iotools.StringConverter :

- add a _checked attribute to indicate whether the converter has been upgraded or not. - switched the default value for bool to False * io.genfromtxt: - fixed for the case where a whole column is masked: switch to bool or the common dtype (if needed)
author: pierregm <pierregm@localhost> 2009-01-26 21:04:26 +0000
committer: pierregm <pierregm@localhost> 2009-01-26 21:04:26 +0000
commit: 2e346ec1e1000c11f484708e2997b7b95808a00d (patch)
tree: ed452e940d6c9f7e00976604acd887bdfb529d70
parent: 9ac9630d805fd4ec235e009782f019fa8d8e8fa9 (diff)
download: numpy-2e346ec1e1000c11f484708e2997b7b95808a00d.tar.gz
3 files changed, 33 insertions, 9 deletions
diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py
index 3f3c6655e..595c01d29 100644
--- a/numpy/lib/_iotools.py
+++ b/numpy/lib/_iotools.py
@@ -294,7 +294,7 @@ class StringConverter:
 
     """
     #
-    _mapper = [(nx.bool_, str2bool, None),
+    _mapper = [(nx.bool_, str2bool, False),
                (nx.integer, int, -1),
                (nx.floating, float, nx.nan),
                (complex, complex, nx.nan+0j),
@@ -354,7 +354,7 @@ class StringConverter:
         if dtype_or_func is None:
             self.func = str2bool
             self._status = 0
-            self.default = default
+            self.default = default or False
             ttype = np.bool
         else:
             # Is the input a np.dtype ?
@@ -396,6 +396,7 @@ class StringConverter:
         #
         self._callingfunction = self._strict_call
         self.type = ttype
+        self._checked = False
     #
     def _loose_call(self, value):
         try:
@@ -408,6 +409,8 @@ class StringConverter:
             return self.func(value)
         except ValueError:
             if value.strip() in self.missing_values:
+                if not self._status:
+                    self._checked = False
                 return self.default
             raise ValueError("Cannot convert string '%s'" % value)
     #
@@ -421,6 +424,7 @@ class StringConverter:
     The order in which the converters are tested is read from the
     :attr:`_status` attribute of the instance.
         """
+        self._checked = True
         try:
             self._strict_call(value)
         except ValueError:
diff --git a/numpy/lib/io.py b/numpy/lib/io.py
index 5a3a077cb..2967ba17d 100644
--- a/numpy/lib/io.py
+++ b/numpy/lib/io.py
@@ -883,18 +883,21 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, skiprows=0,
     # Reset the dtype
     data = rows
     if dtype is None:
-        # Get the dtypes from the first row
-        coldtypes = [np.array(val).dtype for val in data[0]]
-        # Find the columns with strings, and take the largest number of chars.
-        strcolidx = [i for (i, v) in enumerate(coldtypes) if v.char == 'S']
+        # Get the dtypes from the types of the converters
+        coldtypes = [conv.type for conv in converters]
+        # Find the columns with strings...
+        strcolidx = [i for (i, v) in enumerate(coldtypes)
+                     if v in (type('S'), np.string_)]
+        # ... and take the largest number of chars.
         for i in strcolidx:
             coldtypes[i] = "|S%i" % max(len(row[i]) for row in data)
         #
         if names is None:
             # If the dtype is uniform, don't define names, else use ''
-            base = coldtypes[0]
-            if np.all([(dt == base) for dt in coldtypes]):
-                (ddtype, mdtype) = (base, np.bool)
+            base = set([c.type for c in converters if c._checked])
+            
+            if len(base) == 1:
+                (ddtype, mdtype) = (list(base)[0], np.bool)
             else:
                 ddtype = [('', dt) for dt in coldtypes]
                 mdtype = [('', np.bool) for dt in coldtypes]
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
index f0f2a0619..2995d6709 100644
--- a/numpy/lib/tests/test_io.py
+++ b/numpy/lib/tests/test_io.py
@@ -685,6 +685,23 @@ M   33  21.99
         assert_equal(test.mask, control.mask)
 
 
+    def test_with_masked_column_uniform(self):
+        "Test masked column"
+        data = StringIO.StringIO('1 2 3\n4 5 6\n')
+        test = np.genfromtxt(data, missing='2,5', dtype=None, usemask=True)
+        control = ma.array([[1, 2, 3], [4, 5, 6]], mask=[[0, 1, 0],[0, 1, 0]])
+        assert_equal(test, control)
+
+    def test_with_masked_column_various(self):
+        "Test masked column"
+        data = StringIO.StringIO('True 2 3\nFalse 5 6\n')
+        test = np.genfromtxt(data, missing='2,5', dtype=None, usemask=True)
+        control = ma.array([(1, 2, 3), (0, 5, 6)],
+                           mask=[(0, 1, 0),(0, 1, 0)],
+                           dtype=[('f0', bool), ('f1', bool), ('f2', int)])
+        assert_equal(test, control)
+
+
     def test_recfromtxt(self):
         #
         data = StringIO.StringIO('A,B\n0,1\n2,3')
author	pierregm <pierregm@localhost>	2009-01-26 21:04:26 +0000
committer	pierregm <pierregm@localhost>	2009-01-26 21:04:26 +0000
commit	2e346ec1e1000c11f484708e2997b7b95808a00d (patch)
tree	ed452e940d6c9f7e00976604acd887bdfb529d70
parent	9ac9630d805fd4ec235e009782f019fa8d8e8fa9 (diff)
download	numpy-2e346ec1e1000c11f484708e2997b7b95808a00d.tar.gz