diff options
author | Antony Lee <anntzer.lee@gmail.com> | 2021-08-03 16:03:26 +0200 |
---|---|---|
committer | Antony Lee <anntzer.lee@gmail.com> | 2021-08-06 19:17:38 +0200 |
commit | e6be8977bd741ee39fdc0b30fccae036c68db9f7 (patch) | |
tree | 189d348fc01e65f3c19d41ad261adaadcce5380e /numpy/lib/npyio.py | |
parent | 99eac42354f9c900bf8897abec6e7c1bd6c9ff6c (diff) | |
download | numpy-e6be8977bd741ee39fdc0b30fccae036c68db9f7.tar.gz |
PERF: In loadtxt, decide once and for all whether decoding is needed.
... and use a single decoder function instead of repeatedly checking the
input type (in `_decode_line`).
~5-8% speedup.
Diffstat (limited to 'numpy/lib/npyio.py')
-rw-r--r-- | numpy/lib/npyio.py | 18 |
1 files changed, 12 insertions, 6 deletions
diff --git a/numpy/lib/npyio.py b/numpy/lib/npyio.py index 983e2615c..159378992 100644 --- a/numpy/lib/npyio.py +++ b/numpy/lib/npyio.py @@ -979,9 +979,8 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, # Nested functions used by loadtxt. # - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - def split_line(line): - """Chop off comments, strip, and split at delimiter. """ - line = _decode_line(line, encoding=encoding) + def split_line(line: str): + """Chop off comments, strip, and split at delimiter.""" for comment in comments: # Much faster than using a single regex. line = line.split(comment, 1)[0] line = line.strip('\r\n') @@ -1002,8 +1001,7 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, X = [] line_iter = itertools.chain([first_line], fh) line_iter = itertools.islice(line_iter, max_rows) - for i, line in enumerate(line_iter): - vals = split_line(line) + for i, vals in enumerate(map(split_line, map(decode, line_iter))): if len(vals) == 0: continue if usecols: @@ -1108,7 +1106,8 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, # Read until we find a line with some values, and use it to determine # the need for decoding and estimate the number of columns. for first_line in fh: - ncols = len(usecols or split_line(first_line)) + ncols = len(usecols + or split_line(_decode_line(first_line, encoding))) if ncols: break else: # End of lines reached @@ -1117,6 +1116,13 @@ def loadtxt(fname, dtype=float, comments='#', delimiter=None, warnings.warn('loadtxt: Empty input file: "%s"' % fname, stacklevel=2) + # Decide once and for all whether decoding is needed. + if isinstance(first_line, bytes): + decode = methodcaller( + "decode", encoding if encoding is not None else "latin1") + else: + def decode(line): return line + # Now that we know ncols, create the default converters list, and # set packing, if necessary. if len(dtype_types) > 1: |