Merge pull request #11962 from mfkasim91/limitrowloadtxt

ENH: maximum lines of content to be read from numpy.loadtxt
author: Charles Harris <charlesr.harris@gmail.com> 2018-09-30 11:46:52 -0500
committer: GitHub <noreply@github.com> 2018-09-30 11:46:52 -0500
commit: 87c1fcd0308ee78e743401bac2b0085249cca1e5 (patch)
tree: 49925d18210dc11e0fe16f644d78fc0e0143affa /numpy/lib
parent: 289df942fd10c45d9de410cd246629cd8a428237 (diff)
parent: 4577a69516bcc0406aaaa48304c8a2cbd82c58c9 (diff)
download: numpy-87c1fcd0308ee78e743401bac2b0085249cca1e5.tar.gz
2 files changed, 58 insertions, 2 deletions
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index 73cf5554a..77e007d23 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -775,7 +775,7 @@ _loadtxt_chunksize = 50000
 
 def loadtxt(fname, dtype=float, comments='#', delimiter=None,
             converters=None, skiprows=0, usecols=None, unpack=False,
-            ndmin=0, encoding='bytes'):
+            ndmin=0, encoding='bytes', max_rows=None):
     """
     Load data from a text file.
 
@@ -837,6 +837,11 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
         the system default is used. The default value is 'bytes'.
 
         .. versionadded:: 1.14.0
+    max_rows : int, optional
+        Read `max_rows` lines of content after `skiprows` lines. The default
+        is to read all the lines.
+
+        .. versionadded:: 1.16.0
 
     Returns
     -------
@@ -1018,7 +1023,9 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
 
         """
         X = []
-        for i, line in enumerate(itertools.chain([first_line], fh)):
+        line_iter = itertools.chain([first_line], fh)
+        line_iter = itertools.islice(line_iter, max_rows)
+        for i, line in enumerate(line_iter):
             vals = split_line(line)
             if len(vals) == 0:
                 continue
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
index ef08c3f41..08800ff97 100644
--- a/numpy/lib/tests/test_io.py
+++ b/numpy/lib/tests/test_io.py
@@ -1068,6 +1068,55 @@ class TestLoadTxt(LoadTxtBase):
             x = [b'5,6,7,\xc3\x95scarscar', b'15,2,3,hello', b'20,2,3,\xc3\x95scar']
             assert_array_equal(x, np.array(x, dtype="S"))
 
+    def test_max_rows(self):
+        c = TextIO()
+        c.write('1,2,3,5\n4,5,7,8\n2,1,4,5')
+        c.seek(0)
+        x = np.loadtxt(c, dtype=int, delimiter=',',
+                       max_rows=1)
+        a = np.array([1, 2, 3, 5], int)
+        assert_array_equal(x, a)
+
+    def test_max_rows_with_skiprows(self):
+        c = TextIO()
+        c.write('comments\n1,2,3,5\n4,5,7,8\n2,1,4,5')
+        c.seek(0)
+        x = np.loadtxt(c, dtype=int, delimiter=',',
+                       skiprows=1, max_rows=1)
+        a = np.array([1, 2, 3, 5], int)
+        assert_array_equal(x, a)
+
+        c = TextIO()
+        c.write('comment\n1,2,3,5\n4,5,7,8\n2,1,4,5')
+        c.seek(0)
+        x = np.loadtxt(c, dtype=int, delimiter=',',
+                       skiprows=1, max_rows=2)
+        a = np.array([[1, 2, 3, 5], [4, 5, 7, 8]], int)
+        assert_array_equal(x, a)
+
+    def test_max_rows_with_read_continuation(self):
+        c = TextIO()
+        c.write('1,2,3,5\n4,5,7,8\n2,1,4,5')
+        c.seek(0)
+        x = np.loadtxt(c, dtype=int, delimiter=',',
+                       max_rows=2)
+        a = np.array([[1, 2, 3, 5], [4, 5, 7, 8]], int)
+        assert_array_equal(x, a)
+        # test continuation
+        x = np.loadtxt(c, dtype=int, delimiter=',')
+        a = np.array([2,1,4,5], int)
+        assert_array_equal(x, a)
+
+    def test_max_rows_larger(self):
+        #test max_rows > num rows
+        c = TextIO()
+        c.write('comment\n1,2,3,5\n4,5,7,8\n2,1,4,5')
+        c.seek(0)
+        x = np.loadtxt(c, dtype=int, delimiter=',',
+                       skiprows=1, max_rows=6)
+        a = np.array([[1, 2, 3, 5], [4, 5, 7, 8], [2, 1, 4, 5]], int)
+        assert_array_equal(x, a)
+
 class Testfromregex(object):
     def test_record(self):
         c = TextIO()
author	Charles Harris <charlesr.harris@gmail.com>	2018-09-30 11:46:52 -0500
committer	GitHub <noreply@github.com>	2018-09-30 11:46:52 -0500
commit	87c1fcd0308ee78e743401bac2b0085249cca1e5 (patch)
tree	49925d18210dc11e0fe16f644d78fc0e0143affa /numpy/lib
parent	289df942fd10c45d9de410cd246629cd8a428237 (diff)
parent	4577a69516bcc0406aaaa48304c8a2cbd82c58c9 (diff)
download	numpy-87c1fcd0308ee78e743401bac2b0085249cca1e5.tar.gz