Merge pull request #4612 from Nodd/loadtxt_comments

ENH: Multiple comment tokens in loadtxt
author: Charles Harris <charlesr.harris@gmail.com> 2015-04-23 12:41:47 -0400
committer: Charles Harris <charlesr.harris@gmail.com> 2015-04-23 12:41:47 -0400
commit: ea1036cc11d74d7f2a795723f12ab28456e5ab62 (patch)
tree: 8df6dc2e3a7f23cf3f489bedeca08053bc3705c1 /numpy/lib/npyio.py
parent: e9d04e97167b6aa360a2fc40da721430c4ce101b (diff)
parent: 36dbfa5dfd62c559dfdc4aa49bb0192df8a33abd (diff)
download: numpy-ea1036cc11d74d7f2a795723f12ab28456e5ab62.tar.gz
1 files changed, 15 insertions, 7 deletions
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index bab30355c..ec89397a0 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -745,8 +745,9 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
         each row will be interpreted as an element of the array.  In this
         case, the number of columns used must match the number of fields in
         the data-type.
-    comments : str, optional
-        The character used to indicate the start of a comment;
+    comments : str or sequence, optional
+        The characters or list of characters used to indicate the start of a
+        comment;
         default: '#'.
     delimiter : str, optional
         The string used to separate values.  By default, this is any
@@ -819,7 +820,14 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
     """
     # Type conversions for Py3 convenience
     if comments is not None:
-        comments = asbytes(comments)
+        if isinstance(comments, (basestring, bytes)):
+            comments = [asbytes(comments)]
+        else:
+            comments = [asbytes(comment) for comment in comments]
+
+        # Compile regex for comments beforehand
+        comments = (re.escape(comment) for comment in comments)
+        regex_comments = re.compile(asbytes('|').join(comments))
     user_converters = converters
     if delimiter is not None:
         delimiter = asbytes(delimiter)
@@ -897,10 +905,10 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
         returns bytes.
 
         """
-        if comments is None:
-            line = asbytes(line).strip(asbytes('\r\n'))
-        else:
-            line = asbytes(line).split(comments)[0].strip(asbytes('\r\n'))
+        line = asbytes(line)
+        if comments is not None:
+            line = regex_comments.split(asbytes(line), maxsplit=1)[0]
+        line = line.strip(asbytes('\r\n'))
         if line:
             return line.split(delimiter)
         else:
author	Charles Harris <charlesr.harris@gmail.com>	2015-04-23 12:41:47 -0400
committer	Charles Harris <charlesr.harris@gmail.com>	2015-04-23 12:41:47 -0400
commit	ea1036cc11d74d7f2a795723f12ab28456e5ab62 (patch)
tree	8df6dc2e3a7f23cf3f489bedeca08053bc3705c1 /numpy/lib/npyio.py
parent	e9d04e97167b6aa360a2fc40da721430c4ce101b (diff)
parent	36dbfa5dfd62c559dfdc4aa49bb0192df8a33abd (diff)
download	numpy-ea1036cc11d74d7f2a795723f12ab28456e5ab62.tar.gz