summaryrefslogtreecommitdiff
path: root/numpy/lib
diff options
context:
space:
mode:
authorCharles Harris <charlesr.harris@gmail.com>2015-04-23 12:41:47 -0400
committerCharles Harris <charlesr.harris@gmail.com>2015-04-23 12:41:47 -0400
commitea1036cc11d74d7f2a795723f12ab28456e5ab62 (patch)
tree8df6dc2e3a7f23cf3f489bedeca08053bc3705c1 /numpy/lib
parente9d04e97167b6aa360a2fc40da721430c4ce101b (diff)
parent36dbfa5dfd62c559dfdc4aa49bb0192df8a33abd (diff)
downloadnumpy-ea1036cc11d74d7f2a795723f12ab28456e5ab62.tar.gz
Merge pull request #4612 from Nodd/loadtxt_comments
ENH: Multiple comment tokens in loadtxt
Diffstat (limited to 'numpy/lib')
-rw-r--r--numpy/lib/npyio.py22
-rw-r--r--numpy/lib/tests/test_io.py40
2 files changed, 52 insertions, 10 deletions
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index bab30355c..ec89397a0 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -745,8 +745,9 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
each row will be interpreted as an element of the array. In this
case, the number of columns used must match the number of fields in
the data-type.
- comments : str, optional
- The character used to indicate the start of a comment;
+ comments : str or sequence, optional
+ The characters or list of characters used to indicate the start of a
+ comment;
default: '#'.
delimiter : str, optional
The string used to separate values. By default, this is any
@@ -819,7 +820,14 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
"""
# Type conversions for Py3 convenience
if comments is not None:
- comments = asbytes(comments)
+ if isinstance(comments, (basestring, bytes)):
+ comments = [asbytes(comments)]
+ else:
+ comments = [asbytes(comment) for comment in comments]
+
+ # Compile regex for comments beforehand
+ comments = (re.escape(comment) for comment in comments)
+ regex_comments = re.compile(asbytes('|').join(comments))
user_converters = converters
if delimiter is not None:
delimiter = asbytes(delimiter)
@@ -897,10 +905,10 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
returns bytes.
"""
- if comments is None:
- line = asbytes(line).strip(asbytes('\r\n'))
- else:
- line = asbytes(line).split(comments)[0].strip(asbytes('\r\n'))
+ line = asbytes(line)
+ if comments is not None:
+ line = regex_comments.split(asbytes(line), maxsplit=1)[0]
+ line = line.strip(asbytes('\r\n'))
if line:
return line.split(delimiter)
else:
diff --git a/numpy/lib/tests/test_io.py b/numpy/lib/tests/test_io.py
index 8437be14f..8a939f85e 100644
--- a/numpy/lib/tests/test_io.py
+++ b/numpy/lib/tests/test_io.py
@@ -15,7 +15,7 @@ import numpy as np
import numpy.ma as ma
from numpy.lib._iotools import (ConverterError, ConverterLockError,
ConversionWarning)
-from numpy.compat import asbytes, asbytes_nested, bytes, asstr
+from numpy.compat import asbytes, bytes, unicode
from nose import SkipTest
from numpy.ma.testutils import (
TestCase, assert_equal, assert_array_equal, assert_allclose,
@@ -553,15 +553,49 @@ class TestLoadTxt(TestCase):
a = np.array([[2, -999], [7, 9]], int)
assert_array_equal(x, a)
- def test_comments(self):
+ def test_comments_unicode(self):
c = TextIO()
c.write('# comment\n1,2,3,5\n')
c.seek(0)
x = np.loadtxt(c, dtype=int, delimiter=',',
- comments='#')
+ comments=unicode('#'))
+ a = np.array([1, 2, 3, 5], int)
+ assert_array_equal(x, a)
+
+ def test_comments_byte(self):
+ c = TextIO()
+ c.write('# comment\n1,2,3,5\n')
+ c.seek(0)
+ x = np.loadtxt(c, dtype=int, delimiter=',',
+ comments=b'#')
+ a = np.array([1, 2, 3, 5], int)
+ assert_array_equal(x, a)
+
+ def test_comments_multiple(self):
+ c = TextIO()
+ c.write('# comment\n1,2,3\n@ comment2\n4,5,6 // comment3')
+ c.seek(0)
+ x = np.loadtxt(c, dtype=int, delimiter=',',
+ comments=['#', '@', '//'])
+ a = np.array([[1, 2, 3], [4, 5, 6]], int)
+ assert_array_equal(x, a)
+
+ def test_comments_multi_chars(self):
+ c = TextIO()
+ c.write('/* comment\n1,2,3,5\n')
+ c.seek(0)
+ x = np.loadtxt(c, dtype=int, delimiter=',',
+ comments='/*')
a = np.array([1, 2, 3, 5], int)
assert_array_equal(x, a)
+ # Check that '/*' is not transformed to ['/', '*']
+ c = TextIO()
+ c.write('*/ comment\n1,2,3,5\n')
+ c.seek(0)
+ assert_raises(ValueError, np.loadtxt, c, dtype=int, delimiter=',',
+ comments='/*')
+
def test_skiprows(self):
c = TextIO()
c.write('comment\n1,2,3,5\n')