* _iotools.LineSplitter : prevent the first and/or last empty tab-separated columns to be dropped

author: pierregm <pierregm@localhost> 2009-12-08 03:14:37 +0000
committer: pierregm <pierregm@localhost> 2009-12-08 03:14:37 +0000
commit: 20f3fef417dc951af305f32f42d99174570f5069 (patch)
tree: 8d02c6dbc416b49c45a0acc98d09a33c8197e5fa
parent: 73173cd85eae72e4a22b7b465ea74be0dacd8bc6 (diff)
download: numpy-20f3fef417dc951af305f32f42d99174570f5069.tar.gz
2 files changed, 65 insertions, 54 deletions
diff --git a/numpy/lib/_iotools.py b/numpy/lib/_iotools.py
index 2c062f1b0..1c2fd1b6e 100644
--- a/numpy/lib/_iotools.py
+++ b/numpy/lib/_iotools.py
@@ -166,8 +166,8 @@ class LineSplitter:
         # Delimiter is a list of field widths
         elif hasattr(delimiter, '__iter__'):
             _handyman = self._variablewidth_splitter
-            idx = np.cumsum([0]+list(delimiter))
-            delimiter = [slice(i,j) for (i,j) in zip(idx[:-1], idx[1:])]
+            idx = np.cumsum([0] + list(delimiter))
+            delimiter = [slice(i, j) for (i, j) in zip(idx[:-1], idx[1:])]
         # Delimiter is a single integer
         elif int(delimiter):
             (_handyman, delimiter) = (self._fixedwidth_splitter, int(delimiter))
@@ -180,7 +180,7 @@ class LineSplitter:
             self._handyman = _handyman
     #
     def _delimited_splitter(self, line):
-        line = line.split(self.comments)[0].strip()
+        line = line.split(self.comments)[0].strip(" \r\n")
         if not line:
             return []
         return line.split(self.delimiter)
@@ -190,7 +190,7 @@ class LineSplitter:
         if not line:
             return []
         fixed = self.delimiter
-        slices = [slice(i, i+fixed) for i in range(len(line))[::fixed]]
+        slices = [slice(i, i + fixed) for i in range(len(line))[::fixed]]
         return [line[s] for s in slices]
     #
     def _variablewidth_splitter(self, line):
@@ -255,7 +255,7 @@ class NameValidator:
 
     """
     #
-    defaultexcludelist = ['return','file','print']
+    defaultexcludelist = ['return', 'file', 'print']
     defaultdeletechars = set("""~!@#$%^&*()-=+~\|]}[{';: /?.>,<""")
     #
     def __init__(self, excludelist=None, deletechars=None, case_sensitive=None):
@@ -313,7 +313,7 @@ class NameValidator:
                 return None
             names = []
         if isinstance(names, basestring):
-            names = [names,]
+            names = [names, ]
         if nbfields is not None:
             nbnames = len(names)
             if (nbnames < nbfields):
@@ -468,7 +468,7 @@ class StringConverter:
     _mapper = [(nx.bool_, str2bool, False),
                (nx.integer, int, -1),
                (nx.floating, float, nx.nan),
-               (complex, complex, nx.nan+0j),
+               (complex, complex, nx.nan + 0j),
                (nx.string_, str, '???')]
     (_defaulttype, _defaultfunc, _defaultfill) = zip(*_mapper)
     #
@@ -513,7 +513,7 @@ class StringConverter:
                 default = [None] * len(func)
             else:
                 default = list(default)
-                default.append([None] * (len(func)-len(default)))
+                default.append([None] * (len(func) - len(default)))
             for (fct, dft) in zip(func, default):
                 cls._mapper.insert(-1, (cls._getsubdtype(dft), fct, dft))
     #
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
index 2e13c54ed..7c99230d1 100644
--- a/numpy/lib/tests/test_io.py
+++ b/numpy/lib/tests/test_io.py
@@ -90,7 +90,7 @@ class RoundtripTest(object):
         a = np.array([1, 2, 3, 4], int)
         self.roundtrip(a)
 
-    @np.testing.dec.knownfailureif(sys.platform=='win32', "Fail on Win32")
+    @np.testing.dec.knownfailureif(sys.platform == 'win32', "Fail on Win32")
     def test_mmap(self):
         a = np.array([[1, 2.5], [4, 7.3]])
         self.roundtrip(a, file_on_disk=True, load_kwds={'mmap_mode': 'r'})
@@ -113,7 +113,7 @@ class TestSavezLoad(RoundtripTest, TestCase):
     def test_multiple_arrays(self):
         a = np.array([[1, 2], [3, 4]], float)
         b = np.array([[1 + 2j, 2 + 7j], [3 - 6j, 4 + 12j]], complex)
-        self.roundtrip(a,b)
+        self.roundtrip(a, b)
 
     def test_named_arrays(self):
         a = np.array([[1, 2], [3, 4]], float)
@@ -133,7 +133,7 @@ class TestSavezLoad(RoundtripTest, TestCase):
             fd, tmp = mkstemp(suffix='.npz')
             os.close(fd)
             try:
-                arr = np.random.randn(500,500)
+                arr = np.random.randn(500, 500)
                 try:
                     np.savez(tmp, arr=arr)
                 except OSError, err:
@@ -154,7 +154,7 @@ class TestSavezLoad(RoundtripTest, TestCase):
 
 class TestSaveTxt(TestCase):
     def test_array(self):
-        a =np.array([[1, 2], [3, 4]], float)
+        a = np.array([[1, 2], [3, 4]], float)
         fmt = "%.18e"
         c = StringIO.StringIO()
         np.savetxt(c, a, fmt=fmt)
@@ -163,7 +163,7 @@ class TestSaveTxt(TestCase):
                [(fmt + ' ' + fmt + '\n') % (1, 2),
                 (fmt + ' ' + fmt + '\n') % (3, 4)])
 
-        a =np.array([[1, 2], [3, 4]], int)
+        a = np.array([[1, 2], [3, 4]], int)
         c = StringIO.StringIO()
         np.savetxt(c, a, fmt='%d')
         c.seek(0)
@@ -226,7 +226,7 @@ class TestLoadTxt(TestCase):
         d = StringIO.StringIO()
         d.write('M 64.0 75.0\nF 25.0 60.0')
         d.seek(0)
-        mydescriptor = {'names': ('gender','age','weight'),
+        mydescriptor = {'names': ('gender', 'age', 'weight'),
                         'formats': ('S1',
                                     'i4', 'f4')}
         b = np.array([('M', 64.0, 75.0),
@@ -268,7 +268,7 @@ class TestLoadTxt(TestCase):
         c.write('1,2,3,,5\n')
         c.seek(0)
         x = np.loadtxt(c, dtype=int, delimiter=',', \
-            converters={3:lambda s: int(s or -999)})
+            converters={3:lambda s: int(s or - 999)})
         a = np.array([1, 2, 3, -999, 5], int)
         assert_array_equal(x, a)
 
@@ -277,7 +277,7 @@ class TestLoadTxt(TestCase):
         c.write('1,2,3,,5\n6,7,8,9,10\n')
         c.seek(0)
         x = np.loadtxt(c, dtype=int, delimiter=',', \
-            converters={3:lambda s: int(s or -999)}, \
+            converters={3:lambda s: int(s or - 999)}, \
             usecols=(1, 3,))
         a = np.array([[2, -999], [7, 9]], int)
         assert_array_equal(x, a)
@@ -314,9 +314,9 @@ class TestLoadTxt(TestCase):
         np.savetxt(c, a)
         c.seek(0)
         x = np.loadtxt(c, dtype=float, usecols=(1,))
-        assert_array_equal(x, a[:,1])
+        assert_array_equal(x, a[:, 1])
 
-        a =np.array([[1, 2, 3], [3, 4, 5]], float)
+        a = np.array([[1, 2, 3], [3, 4, 5]], float)
         c = StringIO.StringIO()
         np.savetxt(c, a)
         c.seek(0)
@@ -336,8 +336,8 @@ class TestLoadTxt(TestCase):
         names = ['stid', 'temp']
         dtypes = ['S4', 'f8']
         arr = np.loadtxt(c, usecols=(0, 2), dtype=zip(names, dtypes))
-        assert_equal(arr['stid'], ["JOE",  "BOB"])
-        assert_equal(arr['temp'], [25.3,  27.9])
+        assert_equal(arr['stid'], ["JOE", "BOB"])
+        assert_equal(arr['temp'], [25.3, 27.9])
 
     def test_fancy_dtype(self):
         c = StringIO.StringIO()
@@ -387,7 +387,7 @@ class TestLoadTxt(TestCase):
         converters = {1: func}
         test = np.loadtxt(StringIO.StringIO(data), delimiter=";", dtype=ndtype,
                              converters=converters)
-        control = np.array([(1, datetime(2001,1,1)), (2, datetime(2002,1,31))],
+        control = np.array([(1, datetime(2001, 1, 1)), (2, datetime(2002, 1, 31))],
                            dtype=ndtype)
         assert_equal(test, control)
 
@@ -442,7 +442,7 @@ class TestFromTxt(TestCase):
         #
         data = StringIO.StringIO('M 64.0 75.0\nF 25.0 60.0')
 #        data.seek(0)
-        descriptor = {'names': ('gender','age','weight'),
+        descriptor = {'names': ('gender', 'age', 'weight'),
                       'formats': ('S1', 'i4', 'f4')}
         control = np.array([('M', 64.0, 75.0), ('F', 25.0, 60.0)],
                            dtype=descriptor)
@@ -452,12 +452,12 @@ class TestFromTxt(TestCase):
     def test_array(self):
         "Test outputing a standard ndarray"
         data = StringIO.StringIO('1 2\n3 4')
-        control = np.array([[1,2],[3,4]], dtype=int)
+        control = np.array([[1, 2], [3, 4]], dtype=int)
         test = np.ndfromtxt(data, dtype=int)
         assert_array_equal(test, control)
         #
         data.seek(0)
-        control = np.array([[1,2],[3,4]], dtype=float)
+        control = np.array([[1, 2], [3, 4]], dtype=float)
         test = np.loadtxt(data, dtype=float)
         assert_array_equal(test, control)
 
@@ -527,9 +527,9 @@ class TestFromTxt(TestCase):
         control = [np.array(['A', 'BCD']),
                    np.array([64, 25]),
                    np.array([75.0, 60.0]),
-                   np.array([3+4j, 5+6j]),
-                   np.array([True, False]),]
-        assert_equal(test.dtype.names, ['f0','f1','f2','f3','f4'])
+                   np.array([3 + 4j, 5 + 6j]),
+                   np.array([True, False]), ]
+        assert_equal(test.dtype.names, ['f0', 'f1', 'f2', 'f3', 'f4'])
         for (i, ctrl) in enumerate(control):
             assert_equal(test['f%i' % i], ctrl)
 
@@ -538,7 +538,7 @@ class TestFromTxt(TestCase):
         "Tests whether the output dtype can be uniformized"
         data = StringIO.StringIO('1 2 3 4\n5 6 7 8\n')
         test = np.ndfromtxt(data, dtype=None)
-        control = np.array([[1,2,3,4],[5,6,7,8]])
+        control = np.array([[1, 2, 3, 4], [5, 6, 7, 8]])
         assert_equal(test, control)
 
 
@@ -547,16 +547,16 @@ class TestFromTxt(TestCase):
         data = StringIO.StringIO('1,2,3.0\n4,5,6.0\n')
         fancydtype = np.dtype([('x', int), ('y', [('t', int), ('s', float)])])
         test = np.ndfromtxt(data, dtype=fancydtype, delimiter=',')
-        control = np.array([(1,(2,3.0)),(4,(5,6.0))], dtype=fancydtype)
+        control = np.array([(1, (2, 3.0)), (4, (5, 6.0))], dtype=fancydtype)
         assert_equal(test, control)
 
 
     def test_names_overwrite(self):
         "Test overwriting the names of the dtype"
-        descriptor = {'names': ('g','a','w'),
+        descriptor = {'names': ('g', 'a', 'w'),
                       'formats': ('S1', 'i4', 'f4')}
         data = StringIO.StringIO('M 64.0 75.0\nF 25.0 60.0')
-        names = ('gender','age','weight')
+        names = ('gender', 'age', 'weight')
         test = np.ndfromtxt(data, dtype=descriptor, names=names)
         descriptor['names'] = names
         control = np.array([('M', 64.0, 75.0),
@@ -575,7 +575,7 @@ M   33  21.99
         # The # is part of the first name and should be deleted automatically.
         test = np.genfromtxt(data, names=True, dtype=None)
         ctrl = np.array([('M', 21, 72.1), ('F', 35, 58.33), ('M', 33, 21.99)],
-                  dtype=[('gender','|S1'), ('age', int), ('weight', float)])
+                  dtype=[('gender', '|S1'), ('age', int), ('weight', float)])
         assert_equal(test, ctrl)
         # Ditto, but we should get rid of the first element
         data = StringIO.StringIO("""
@@ -602,9 +602,9 @@ M   33  21.99
         "Test the combination user-defined converters and usecol"
         data = StringIO.StringIO('1,2,3,,5\n6,7,8,9,10\n')
         test = np.ndfromtxt(data, dtype=int, delimiter=',',
-                            converters={3:lambda s: int(s or -999)},
-                            usecols=(1, 3, ))
-        control = np.array([[2,  -999], [7, 9]], int)
+                            converters={3:lambda s: int(s or - 999)},
+                            usecols=(1, 3,))
+        control = np.array([[2, -999], [7, 9]], int)
         assert_equal(test, control)
 
     def test_converters_with_usecols_and_names(self):
@@ -621,8 +621,8 @@ M   33  21.99
         converter = {'date': lambda s: strptime(s, '%Y-%m-%d %H:%M:%SZ')}
         data = StringIO.StringIO('2009-02-03 12:00:00Z, 72214.0')
         test = np.ndfromtxt(data, delimiter=',', dtype=None,
-                            names=['date','stid'], converters=converter)
-        control = np.array((datetime(2009,02,03), 72214.),
+                            names=['date', 'stid'], converters=converter)
+        control = np.array((datetime(2009, 02, 03), 72214.),
                            dtype=[('date', np.object_), ('stid', float)])
         assert_equal(test, control)
 
@@ -658,7 +658,7 @@ M   33  21.99
         test = np.ndfromtxt(StringIO.StringIO(dstr,),
                             delimiter=";", dtype=float, converters={0:str})
         control = np.array([('2009', 23., 46)],
-                           dtype=[('f0','|S4'), ('f1', float), ('f2', float)])
+                           dtype=[('f0', '|S4'), ('f1', float), ('f2', float)])
         assert_equal(test, control)
         test = np.ndfromtxt(StringIO.StringIO(dstr,),
                             delimiter=";", dtype=float, converters={0:float})
@@ -679,7 +679,7 @@ M   33  21.99
         converters = {1: func}
         test = np.genfromtxt(StringIO.StringIO(data), delimiter=";", dtype=ndtype,
                              converters=converters)
-        control = np.array([(1, datetime(2001,1,1)), (2, datetime(2002,1,31))],
+        control = np.array([(1, datetime(2001, 1, 1)), (2, datetime(2002, 1, 31))],
                            dtype=ndtype)
         assert_equal(test, control)
         #
@@ -709,18 +709,29 @@ M   33  21.99
         data = StringIO.StringIO("1  2  3  4   5\n6  7  8  9  10")
         test = np.ndfromtxt(data)
         control = np.array([[ 1., 2., 3., 4., 5.],
-                            [ 6., 7., 8., 9.,10.]])
+                            [ 6., 7., 8., 9., 10.]])
         assert_equal(test, control)
 
 
     def test_missing(self):
         data = StringIO.StringIO('1,2,3,,5\n')
         test = np.ndfromtxt(data, dtype=int, delimiter=',', \
-                            converters={3:lambda s: int(s or -999)})
+                            converters={3:lambda s: int(s or - 999)})
         control = np.array([1, 2, 3, -999, 5], int)
         assert_equal(test, control)
 
 
+    def test_missing_with_tabs(self):
+        "Test w/ a delimiter tab"
+        txt = "1\t2\t3\n\t2\t\n1\t\t3"
+        test = np.genfromtxt(StringIO.StringIO(txt), delimiter="\t",
+                             usemask=True,)
+        ctrl_d = np.array([(1, 2, 3), (np.nan, 2, np.nan), (1, np.nan, 3)],)
+        ctrl_m = np.array([(0, 0, 0), (1, 0, 1), (0, 1, 0)], dtype=bool)
+        assert_equal(test.data, ctrl_d)
+        assert_equal(test.mask, ctrl_m)
+
+
     def test_usecols(self):
         "Test the selection of columns"
         # Select 1 column
@@ -731,7 +742,7 @@ M   33  21.99
         test = np.ndfromtxt(data, dtype=float, usecols=(1,))
         assert_equal(test, control[:, 1])
         #
-        control = np.array( [[1, 2, 3], [3, 4, 5]], float)
+        control = np.array([[1, 2, 3], [3, 4, 5]], float)
         data = StringIO.StringIO()
         np.savetxt(data, control)
         data.seek(0)
@@ -756,8 +767,8 @@ M   33  21.99
         names = ['stid', 'temp']
         dtypes = ['S4', 'f8']
         test = np.ndfromtxt(data, usecols=(0, 2), dtype=zip(names, dtypes))
-        assert_equal(test['stid'],  ["JOE",  "BOB"])
-        assert_equal(test['temp'],  [25.3,  27.9])
+        assert_equal(test['stid'], ["JOE", "BOB"])
+        assert_equal(test['temp'], [25.3, 27.9])
 
     def test_usecols_with_integer(self):
         "Test usecols with an integer"
@@ -787,7 +798,7 @@ M   33  21.99
         data = StringIO.StringIO('1,2,3.0\n4,5,6.0\n')
         fancydtype = np.dtype([('x', int), ('y', [('t', int), ('s', float)])])
         test = np.mafromtxt(data, dtype=fancydtype, delimiter=',')
-        control = ma.array([(1,(2,3.0)),(4,(5,6.0))], dtype=fancydtype)
+        control = ma.array([(1, (2, 3.0)), (4, (5, 6.0))], dtype=fancydtype)
         assert_equal(test, control)
 
 
@@ -827,8 +838,8 @@ M   33  21.99
         #
         test = np.mafromtxt(StringIO.StringIO(data), missing_values="N/A",
                             **basekwargs)
-        control = ma.array([(   0, 0.0,    0j), (1, -999, 1j),
-                            (  -9, 2.2, -999j), (3,  -99, 3j)],
+        control = ma.array([(0, 0.0, 0j), (1, -999, 1j),
+                            (-9, 2.2, -999j), (3, -99, 3j)],
                             mask=[(0, 0, 0), (0, 1, 0), (0, 0, 1), (0, 0, 0)],
                             dtype=mdtype)
         assert_equal(test, control)
@@ -836,8 +847,8 @@ M   33  21.99
         basekwargs['dtype'] = mdtype
         test = np.mafromtxt(StringIO.StringIO(data),
                             missing_values={0:-9, 1:-99, 2:-999j}, **basekwargs)
-        control = ma.array([(   0, 0.0,    0j), (1, -999, 1j),
-                            (  -9, 2.2, -999j), (3,  -99, 3j)],
+        control = ma.array([(0, 0.0, 0j), (1, -999, 1j),
+                            (-9, 2.2, -999j), (3, -99, 3j)],
                             mask=[(0, 0, 0), (0, 1, 0), (1, 0, 1), (0, 1, 0)],
                             dtype=mdtype)
         assert_equal(test, control)
@@ -845,8 +856,8 @@ M   33  21.99
         test = np.mafromtxt(StringIO.StringIO(data),
                             missing_values={0:-9, 'B':-99, 'C':-999j},
                             **basekwargs)
-        control = ma.array([(   0, 0.0,    0j), (1, -999, 1j),
-                            (  -9, 2.2, -999j), (3,  -99, 3j)],
+        control = ma.array([(0, 0.0, 0j), (1, -999, 1j),
+                            (-9, 2.2, -999j), (3, -99, 3j)],
                             mask=[(0, 0, 0), (0, 1, 0), (1, 0, 1), (0, 1, 0)],
                             dtype=mdtype)
         assert_equal(test, control)
@@ -886,7 +897,7 @@ M   33  21.99
         data = StringIO.StringIO('1 2 3\n4 5 6\n')
         test = np.genfromtxt(data, dtype=None,
                              missing_values='2,5', usemask=True)
-        control = ma.array([[1, 2, 3], [4, 5, 6]], mask=[[0, 1, 0],[0, 1, 0]])
+        control = ma.array([[1, 2, 3], [4, 5, 6]], mask=[[0, 1, 0], [0, 1, 0]])
         assert_equal(test, control)
 
     def test_with_masked_column_various(self):
@@ -895,7 +906,7 @@ M   33  21.99
         test = np.genfromtxt(data, dtype=None,
                              missing_values='2,5', usemask=True)
         control = ma.array([(1, 2, 3), (0, 5, 6)],
-                           mask=[(0, 1, 0),(0, 1, 0)],
+                           mask=[(0, 1, 0), (0, 1, 0)],
                            dtype=[('f0', bool), ('f1', bool), ('f2', int)])
         assert_equal(test, control)
 
@@ -1059,7 +1070,7 @@ M   33  21.99
     def test_filling_values(self):
         "Test missing values"
         data = "1, 2, 3\n1, , 5\n0, 6, \n"
-        kwargs = dict(delimiter=",", dtype=None, filling_values=-999)
+        kwargs = dict(delimiter=",", dtype=None, filling_values= -999)
         ctrl = np.array([[1, 2, 3], [1, -999, 5], [0, 6, -999]], dtype=int)
         test = np.ndfromtxt(StringIO.StringIO(data), **kwargs)
         assert_equal(test, ctrl)
author	pierregm <pierregm@localhost>	2009-12-08 03:14:37 +0000
committer	pierregm <pierregm@localhost>	2009-12-08 03:14:37 +0000
commit	20f3fef417dc951af305f32f42d99174570f5069 (patch)
tree	8d02c6dbc416b49c45a0acc98d09a33c8197e5fa
parent	73173cd85eae72e4a22b7b465ea74be0dacd8bc6 (diff)
download	numpy-20f3fef417dc951af305f32f42d99174570f5069.tar.gz