ENH:Add keyword nrows to genfromtxt.

This allows one to specify the maximum number of row processed in in a call. The new functionality allows for reading more complex data formats. For instance, multiple calls can be used to read in multiple arrays stored in a single file. Closes #5084. Closes #5093.
author: styr <styr.py@gmail.com> 2014-09-21 16:19:57 +0800
committer: Charles Harris <charlesr.harris@gmail.com> 2015-01-23 10:34:30 -0700
commit: 0091499ec28cd9ceb30cd94c0e40191570b6fec6 (patch)
tree: babcb37140e6e7730f50032c07674f20da629b10 /numpy
parent: d44604ef625ff38e835a51cad3bcd24400278eff (diff)
download: numpy-0091499ec28cd9ceb30cd94c0e40191570b6fec6.tar.gz
2 files changed, 46 insertions, 1 deletions
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index a40de4fea..c8cebaed8 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -1204,7 +1204,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
                usecols=None, names=None,
                excludelist=None, deletechars=None, replace_space='_',
                autostrip=False, case_sensitive=True, defaultfmt="f%i",
-               unpack=None, usemask=False, loose=True, invalid_raise=True):
+               unpack=None, usemask=False, loose=True, invalid_raise=True,
+               nrows=None):
     """
     Load data from a text file, with missing values handled as specified.
 
@@ -1285,6 +1286,11 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
         If True, an exception is raised if an inconsistency is detected in the
         number of columns.
         If False, a warning is emitted and the offending lines are skipped.
+    nrows : int,  optional
+        The number of rows to read. Must not be used with skip_footer at the
+        same time.
+
+        .. versionadded:: 1.10.0
 
     Returns
     -------
@@ -1353,6 +1359,12 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
           dtype=[('intvar', '<i8'), ('fltvar', '<f8'), ('strvar', '|S5')])
 
     """
+    # Check keywords conflict
+    if skip_footer and (nrows is not None):
+        raise ValueError(
+                "keywords 'skip_footer' and 'nrows' can not be specified "
+                "at the same time")
+
     # Py3 data conversions to bytes, for convenience
     if comments is not None:
         comments = asbytes(comments)
@@ -1642,6 +1654,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
 
     # Parse each line
     for (i, line) in enumerate(itertools.chain([first_line, ], fhd)):
+        if (nrows is not None) and (len(rows) >= nrows):
+            break
         values = split_line(line)
         nbvalues = len(values)
         # Skip an empty line
@@ -1666,6 +1680,11 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None,
     if own_fhd:
         fhd.close()
 
+    if (nrows is not None) and (len(rows) != nrows):
+        raise AssertionError(
+                "%d rows required but got %d valid rows instead"
+                %(nrows,  len(rows)))
+
     # Upgrade the converters (if needed)
     if dtype is None:
         for (i, converter) in enumerate(converters):
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
index 81bddfadd..df5ab1a2a 100644
--- a/numpy/lib/tests/test_io.py
+++ b/numpy/lib/tests/test_io.py
@@ -1641,6 +1641,32 @@ M   33  21.99
         self.assertTrue(isinstance(test, np.recarray))
         assert_equal(test, control)
 
+    def test_nrows(self):
+        #
+        data = '1 1\n2 2\n0 \n3 3\n4 4\n5  \n6  \n7  \n'
+        test = np.genfromtxt(TextIO(data),  nrows=2)
+        control = np.array([[1., 1.], [2., 2.]])
+        assert_equal(test,   control)
+        # Test keywords conflict
+        assert_raises(ValueError, np.genfromtxt, TextIO(data), skip_footer=1, nrows=4)
+        # Test with invalid value
+        assert_raises(ValueError, np.genfromtxt, TextIO(data), nrows=4)
+        # Test with invalid not raise
+        with warnings.catch_warnings():
+            warnings.filterwarnings("ignore")
+            test = np.genfromtxt(TextIO(data), nrows=4, invalid_raise=False)
+            control = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]])
+            assert_equal(test, control)
+        # Test without enough valid rows
+        assert_raises(AssertionError, np.genfromtxt, TextIO(data), nrows=5)
+
+        data = 'a b\n#c d\n1 1\n2 2\n#0 \n3 3\n4 4\n5  \n6  \n7  \n'
+        # Test with header, names and comments
+        test = np.genfromtxt(TextIO(data), skip_header=1, nrows=4, names=True)
+        control = np.array([(1.0, 1.0), (2.0, 2.0), (3.0, 3.0), (4.0, 4.0)],
+                      dtype=[('c', '<f8'), ('d', '<f8')])
+        assert_equal(test, control)
+
     def test_gft_using_filename(self):
         # Test that we can load data from a filename as well as a file object
         wanted = np.arange(6).reshape((2, 3))
author	styr <styr.py@gmail.com>	2014-09-21 16:19:57 +0800
committer	Charles Harris <charlesr.harris@gmail.com>	2015-01-23 10:34:30 -0700
commit	0091499ec28cd9ceb30cd94c0e40191570b6fec6 (patch)
tree	babcb37140e6e7730f50032c07674f20da629b10 /numpy
parent	d44604ef625ff38e835a51cad3bcd24400278eff (diff)
download	numpy-0091499ec28cd9ceb30cd94c0e40191570b6fec6.tar.gz