diff options
author | Charles Harris <charlesr.harris@gmail.com> | 2015-04-23 12:41:47 -0400 |
---|---|---|
committer | Charles Harris <charlesr.harris@gmail.com> | 2015-04-23 12:41:47 -0400 |
commit | ea1036cc11d74d7f2a795723f12ab28456e5ab62 (patch) | |
tree | 8df6dc2e3a7f23cf3f489bedeca08053bc3705c1 /numpy/lib | |
parent | e9d04e97167b6aa360a2fc40da721430c4ce101b (diff) | |
parent | 36dbfa5dfd62c559dfdc4aa49bb0192df8a33abd (diff) | |
download | numpy-ea1036cc11d74d7f2a795723f12ab28456e5ab62.tar.gz |
Merge pull request #4612 from Nodd/loadtxt_comments
ENH: Multiple comment tokens in loadtxt
Diffstat (limited to 'numpy/lib')
-rw-r--r-- | numpy/lib/npyio.py | 22 | ||||
-rw-r--r-- | numpy/lib/tests/test_io.py | 40 |
2 files changed, 52 insertions, 10 deletions
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index bab30355c..ec89397a0 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -745,8 +745,9 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, each row will be interpreted as an element of the array. In this case, the number of columns used must match the number of fields in the data-type. - comments : str, optional - The character used to indicate the start of a comment; + comments : str or sequence, optional + The characters or list of characters used to indicate the start of a + comment; default: '#'. delimiter : str, optional The string used to separate values. By default, this is any @@ -819,7 +820,14 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, """ # Type conversions for Py3 convenience if comments is not None: - comments = asbytes(comments) + if isinstance(comments, (basestring, bytes)): + comments = [asbytes(comments)] + else: + comments = [asbytes(comment) for comment in comments] + + # Compile regex for comments beforehand + comments = (re.escape(comment) for comment in comments) + regex_comments = re.compile(asbytes('|').join(comments)) user_converters = converters if delimiter is not None: delimiter = asbytes(delimiter) @@ -897,10 +905,10 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, returns bytes. """ - if comments is None: - line = asbytes(line).strip(asbytes('\r\n')) - else: - line = asbytes(line).split(comments)[0].strip(asbytes('\r\n')) + line = asbytes(line) + if comments is not None: + line = regex_comments.split(asbytes(line), maxsplit=1)[0] + line = line.strip(asbytes('\r\n')) if line: return line.split(delimiter) else: diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py index 8437be14f..8a939f85e 100644 --- a/numpy/lib/tests/test_io.py +++ b/numpy/lib/tests/test_io.py @@ -15,7 +15,7 @@ import numpy as np import numpy.ma as ma from numpy.lib._iotools import (ConverterError, ConverterLockError, ConversionWarning) -from numpy.compat import asbytes, asbytes_nested, bytes, asstr +from numpy.compat import asbytes, bytes, unicode from nose import SkipTest from numpy.ma.testutils import ( TestCase, assert_equal, assert_array_equal, assert_allclose, @@ -553,15 +553,49 @@ class TestLoadTxt(TestCase): a = np.array([[2, -999], [7, 9]], int) assert_array_equal(x, a) - def test_comments(self): + def test_comments_unicode(self): c = TextIO() c.write('# comment\n1,2,3,5\n') c.seek(0) x = np.loadtxt(c, dtype=int, delimiter=',', - comments='#') + comments=unicode('#')) + a = np.array([1, 2, 3, 5], int) + assert_array_equal(x, a) + + def test_comments_byte(self): + c = TextIO() + c.write('# comment\n1,2,3,5\n') + c.seek(0) + x = np.loadtxt(c, dtype=int, delimiter=',', + comments=b'#') + a = np.array([1, 2, 3, 5], int) + assert_array_equal(x, a) + + def test_comments_multiple(self): + c = TextIO() + c.write('# comment\n1,2,3\n@ comment2\n4,5,6 // comment3') + c.seek(0) + x = np.loadtxt(c, dtype=int, delimiter=',', + comments=['#', '@', '//']) + a = np.array([[1, 2, 3], [4, 5, 6]], int) + assert_array_equal(x, a) + + def test_comments_multi_chars(self): + c = TextIO() + c.write('/* comment\n1,2,3,5\n') + c.seek(0) + x = np.loadtxt(c, dtype=int, delimiter=',', + comments='/*') a = np.array([1, 2, 3, 5], int) assert_array_equal(x, a) + # Check that '/*' is not transformed to ['/', '*'] + c = TextIO() + c.write('*/ comment\n1,2,3,5\n') + c.seek(0) + assert_raises(ValueError, np.loadtxt, c, dtype=int, delimiter=',', + comments='/*') + def test_skiprows(self): c = TextIO() c.write('comment\n1,2,3,5\n') |