diff options
author | styr <styr.py@gmail.com> | 2014-09-21 16:19:57 +0800 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2015-01-23 10:34:30 -0700 |
commit | 0091499ec28cd9ceb30cd94c0e40191570b6fec6 (patch) | |
tree | babcb37140e6e7730f50032c07674f20da629b10 /numpy | |
parent | d44604ef625ff38e835a51cad3bcd24400278eff (diff) | |
download | numpy-0091499ec28cd9ceb30cd94c0e40191570b6fec6.tar.gz |
ENH:Add keyword nrows to genfromtxt.
This allows one to specify the maximum number of row processed in
in a call. The new functionality allows for reading more complex
data formats. For instance, multiple calls can be used to read in
multiple arrays stored in a single file.
Closes #5084.
Closes #5093.
Diffstat (limited to 'numpy')
-rw-r--r-- | numpy/lib/npyio.py | 21 | ||||
-rw-r--r-- | numpy/lib/tests/test_io.py | 26 |
2 files changed, 46 insertions, 1 deletions
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index a40de4fea..c8cebaed8 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -1204,7 +1204,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, usecols=None, names=None, excludelist=None, deletechars=None, replace_space='_', autostrip=False, case_sensitive=True, defaultfmt="f%i", - unpack=None, usemask=False, loose=True, invalid_raise=True): + unpack=None, usemask=False, loose=True, invalid_raise=True, + nrows=None): """ Load data from a text file, with missing values handled as specified. @@ -1285,6 +1286,11 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, If True, an exception is raised if an inconsistency is detected in the number of columns. If False, a warning is emitted and the offending lines are skipped. + nrows : int, optional + The number of rows to read. Must not be used with skip_footer at the + same time. + + .. versionadded:: 1.10.0 Returns ------- @@ -1353,6 +1359,12 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, dtype=[('intvar', '<i8'), ('fltvar', '<f8'), ('strvar', '|S5')]) """ + # Check keywords conflict + if skip_footer and (nrows is not None): + raise ValueError( + "keywords 'skip_footer' and 'nrows' can not be specified " + "at the same time") + # Py3 data conversions to bytes, for convenience if comments is not None: comments = asbytes(comments) @@ -1642,6 +1654,8 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, # Parse each line for (i, line) in enumerate(itertools.chain([first_line, ], fhd)): + if (nrows is not None) and (len(rows) >= nrows): + break values = split_line(line) nbvalues = len(values) # Skip an empty line @@ -1666,6 +1680,11 @@ def genfromtxt(fname, dtype=float, comments='#', delimiter=None, if own_fhd: fhd.close() + if (nrows is not None) and (len(rows) != nrows): + raise AssertionError( + "%d rows required but got %d valid rows instead" + %(nrows, len(rows))) + # Upgrade the converters (if needed) if dtype is None: for (i, converter) in enumerate(converters): diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py index 81bddfadd..df5ab1a2a 100644 --- a/numpy/lib/tests/test_io.py +++ b/numpy/lib/tests/test_io.py @@ -1641,6 +1641,32 @@ M 33 21.99 self.assertTrue(isinstance(test, np.recarray)) assert_equal(test, control) + def test_nrows(self): + # + data = '1 1\n2 2\n0 \n3 3\n4 4\n5 \n6 \n7 \n' + test = np.genfromtxt(TextIO(data), nrows=2) + control = np.array([[1., 1.], [2., 2.]]) + assert_equal(test, control) + # Test keywords conflict + assert_raises(ValueError, np.genfromtxt, TextIO(data), skip_footer=1, nrows=4) + # Test with invalid value + assert_raises(ValueError, np.genfromtxt, TextIO(data), nrows=4) + # Test with invalid not raise + with warnings.catch_warnings(): + warnings.filterwarnings("ignore") + test = np.genfromtxt(TextIO(data), nrows=4, invalid_raise=False) + control = np.array([[1., 1.], [2., 2.], [3., 3.], [4., 4.]]) + assert_equal(test, control) + # Test without enough valid rows + assert_raises(AssertionError, np.genfromtxt, TextIO(data), nrows=5) + + data = 'a b\n#c d\n1 1\n2 2\n#0 \n3 3\n4 4\n5 \n6 \n7 \n' + # Test with header, names and comments + test = np.genfromtxt(TextIO(data), skip_header=1, nrows=4, names=True) + control = np.array([(1.0, 1.0), (2.0, 2.0), (3.0, 3.0), (4.0, 4.0)], + dtype=[('c', '<f8'), ('d', '<f8')]) + assert_equal(test, control) + def test_gft_using_filename(self): # Test that we can load data from a filename as well as a file object wanted = np.arange(6).reshape((2, 3)) |