summaryrefslogtreecommitdiff
path: root/numpy/lib/npyio.py
diff options
context:
space:
mode:
authorAntony Lee <anntzer.lee@gmail.com>2021-08-03 16:03:26 +0200
committerAntony Lee <anntzer.lee@gmail.com>2021-08-06 19:17:38 +0200
commite6be8977bd741ee39fdc0b30fccae036c68db9f7 (patch)
tree189d348fc01e65f3c19d41ad261adaadcce5380e /numpy/lib/npyio.py
parent99eac42354f9c900bf8897abec6e7c1bd6c9ff6c (diff)
downloadnumpy-e6be8977bd741ee39fdc0b30fccae036c68db9f7.tar.gz
PERF: In loadtxt, decide once and for all whether decoding is needed.
... and use a single decoder function instead of repeatedly checking the input type (in `_decode_line`). ~5-8% speedup.
Diffstat (limited to 'numpy/lib/npyio.py')
-rw-r--r--numpy/lib/npyio.py18
1 files changed, 12 insertions, 6 deletions
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py
index 983e2615c..159378992 100644
--- a/numpy/lib/npyio.py
+++ b/numpy/lib/npyio.py
@@ -979,9 +979,8 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
# Nested functions used by loadtxt.
# - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -
- def split_line(line):
- """Chop off comments, strip, and split at delimiter. """
- line = _decode_line(line, encoding=encoding)
+ def split_line(line: str):
+ """Chop off comments, strip, and split at delimiter."""
for comment in comments: # Much faster than using a single regex.
line = line.split(comment, 1)[0]
line = line.strip('\r\n')
@@ -1002,8 +1001,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
X = []
line_iter = itertools.chain([first_line], fh)
line_iter = itertools.islice(line_iter, max_rows)
- for i, line in enumerate(line_iter):
- vals = split_line(line)
+ for i, vals in enumerate(map(split_line, map(decode, line_iter))):
if len(vals) == 0:
continue
if usecols:
@@ -1108,7 +1106,8 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
# Read until we find a line with some values, and use it to determine
# the need for decoding and estimate the number of columns.
for first_line in fh:
- ncols = len(usecols or split_line(first_line))
+ ncols = len(usecols
+ or split_line(_decode_line(first_line, encoding)))
if ncols:
break
else: # End of lines reached
@@ -1117,6 +1116,13 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None,
warnings.warn('loadtxt: Empty input file: "%s"' % fname,
stacklevel=2)
+ # Decide once and for all whether decoding is needed.
+ if isinstance(first_line, bytes):
+ decode = methodcaller(
+ "decode", encoding if encoding is not None else "latin1")
+ else:
+ def decode(line): return line
+
# Now that we know ncols, create the default converters list, and
# set packing, if necessary.
if len(dtype_types) > 1: