summaryrefslogtreecommitdiff
path: root/numpy/lib/npyio.py
diff options
context:
space:
mode:
authorSebastian Berg <sebastian@sipsolutions.net>2022-01-07 19:42:06 -0600
committerSebastian Berg <sebastian@sipsolutions.net>2022-01-14 20:07:07 -0600
commit3f2b8d38805d082459d5fc8cfd747291c5ed32d2 (patch)
treed02442368184bbb5eda6bc0e2102d9a6a574a89f /numpy/lib/npyio.py
parent37523dc7130cfac5400e7a0b511ba049f4c3713f (diff)
downloadnumpy-3f2b8d38805d082459d5fc8cfd747291c5ed32d2.tar.gz
ENH: Reject empty string as control character
`None` is forced instead in all cases (mainly applies to comments). This is not really a change in behaviour: It was always utterly broken. The one weird thing about it is that `delimiter=None` means "any whitespace", while `quote=None` and `comments=None` means that no quote/comment character exists at all.
Diffstat (limited to 'numpy/lib/npyio.py')
-rw-r--r--numpy/lib/npyio.py56
1 files changed, 31 insertions, 25 deletions
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index 0db5208c3..330eca642 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -811,15 +811,17 @@ def _read(fname, *, delimiter=',', comment='#', quote='"',
The filename or the file to be read.
delimiter : str, optional
Field delimiter of the fields in line of the file.
- Default is a comma, ','.
- comment : str or sequence of str, optional
+ Default is a comma, ','. If None any sequence of whitespace is
+ considered a delimiter.
+ comment : str or sequence of str or None, optional
Character that begins a comment. All text from the comment
character to the end of the line is ignored.
Multiple comments or multiple-character comment strings are supported,
but may be slower and `quote` must be empty if used.
- quote : str, optional
+ Use None to disable all use of comments.
+ quote : str or None, optional
Character that is used to quote string fields. Default is '"'
- (a double quote).
+ (a double quote). Use None to disable quote support.
imaginary_unit : str, optional
Character that represent the imaginay unit `sqrt(-1)`.
Default is 'j'.
@@ -929,29 +931,33 @@ def _read(fname, *, delimiter=',', comment='#', quote='"',
_ensure_ndmin_ndarray_check_param(ndmin)
- if not isinstance(comment, str):
+ if comment is None:
+ comments = None
+ elif isinstance(comment, str):
+ if len(comment) > 1: # length of 0 is rejected later
+ comments = (comment,)
+ comment = None
+ else:
+ comments = None
+ else:
# assume comments are a sequence of strings
comments = tuple(comment)
- comment = ''
- # If there is only one comment, and that comment has one character,
- # the normal parsing can deal with it just fine.
- if len(comments) == 1:
+ comment = None
+ if len(comments) == 0:
+ comments = None # No comments at all
+ elif len(comments) == 1:
+ # If there is only one comment, and that comment has one character,
+ # the normal parsing can deal with it just fine.
if isinstance(comments[0], str) and len(comments[0]) == 1:
comment = comments[0]
comments = None
- elif len(comment) > 1:
- comments = (comment,)
- comment = ''
- else:
- comments = None
# comment is now either a 1 or 0 character string or a tuple:
if comments is not None:
- assert comment == ''
# Note: An earlier version support two character comments (and could
# have been extended to multiple characters, we assume this is
# rare enough to not optimize for.
- if quote != "":
+ if quote is not None:
raise ValueError(
"when multiple comments or a multi-character comment is "
"given, quotes are not supported. In this case the quote "
@@ -1073,7 +1079,7 @@ def _read(fname, *, delimiter=',', comment='#', quote='"',
@set_module('numpy')
def loadtxt(fname, dtype=float, comments='#', delimiter=None,
converters=None, skiprows=0, usecols=None, unpack=False,
- ndmin=0, encoding='bytes', max_rows=None, *, like=None):
+ ndmin=0, encoding='bytes', max_rows=None, *, quote=None, like=None):
r"""
Load data from a text file.
@@ -1092,7 +1098,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
each row will be interpreted as an element of the array. In this
case, the number of columns used must match the number of fields in
the data-type.
- comments : str or sequence of str, optional
+ comments : str or sequence of str or None, optional
The characters or list of characters used to indicate the start of a
comment. None implies no comments. For backwards compatibility, byte
strings will be decoded as 'latin1'. The default is '#'.
@@ -1143,6 +1149,10 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
is to read all the lines.
.. versionadded:: 1.16.0
+ quote : unicode character or None, optional
+ If given (normally ``"``) quoting support is enabled. Double quotes
+ are considered a single escaped ones if found within a quoted field
+ (supporting the Excel csv dialect).
${ARRAY_FUNCTION_LIKE}
.. versionadded:: 1.20.0
@@ -1211,9 +1221,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
max_rows=max_rows, like=like
)
- if delimiter is None:
- delimiter = ''
- elif isinstance(delimiter, bytes):
+ if isinstance(delimiter, bytes):
delimiter.decode("latin1")
if dtype is None:
@@ -1221,9 +1229,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
comment = comments
# Type conversions for Py3 convenience
- if comment is None:
- comment = ''
- else:
+ if comment is not None:
if isinstance(comment, (str, bytes)):
comment = [comment]
comment = [
@@ -1232,7 +1238,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
arr = _read(fname, dtype=dtype, comment=comment, delimiter=delimiter,
converters=converters, skiprows=skiprows, usecols=usecols,
unpack=unpack, ndmin=ndmin, encoding=encoding,
- max_rows=max_rows, quote='')
+ max_rows=max_rows, quote=quote)
return arr