diff options
author | Jan-Thorsten Peter <peter@cs.rwth-aachen.de> | 2022-02-18 12:41:46 +0100 |
---|---|---|
committer | Jan-Thorsten Peter <peter@cs.rwth-aachen.de> | 2022-02-18 18:17:02 +0100 |
commit | fd6f0bd4f7bee934d7f4975ce4997793c4a26657 (patch) | |
tree | 614e4fa0eaf0ed6286dd8f663d02614f75c5cd0c | |
parent | 1c74053bbbf5ce92bc1fc8faaf37e903aa90d011 (diff) | |
download | unidecode-fd6f0bd4f7bee934d7f4975ce4997793c4a26657.tar.gz |
Read input line by line
-rw-r--r-- | tests/test_utility.py | 2 | ||||
-rw-r--r-- | unidecode/util.py | 22 |
2 files changed, 13 insertions, 11 deletions
diff --git a/tests/test_utility.py b/tests/test_utility.py index 04d31bb..844a6ec 100644 --- a/tests/test_utility.py +++ b/tests/test_utility.py @@ -40,7 +40,7 @@ class TestUnidecodeUtility(unittest.TestCase): out, err, rc = run(['-e', 'utf8', f.name]) # Text after : ... can differ between Python versions - self.assertRegex(err, '^Unable to decode input: ') + self.assertRegex(err, '^Unable to decode input line ') self.assertEqual(rc, 1) def test_file_specified_encoding(self): diff --git a/unidecode/util.py b/unidecode/util.py index 415bd7b..05f08bd 100644 --- a/unidecode/util.py +++ b/unidecode/util.py @@ -1,5 +1,6 @@ # vim:ts=4 sw=4 expandtab softtabstop=4 import argparse +import io import locale import os import sys @@ -30,20 +31,21 @@ def main(): if args.text: fatal("Can't use both FILE and -c option") else: - with open(args.path, 'rb') as f: - stream = f.read() + stream = open(args.path, 'rb') elif args.text: - stream = os.fsencode(args.text) + text = os.fsencode(args.text) # add a newline to the string if it comes from the # command line so that the result is printed nicely # on the console. - stream += b'\n' + stream = io.BytesIO(text + b'\n') else: - stream = sys.stdin.buffer.read() + stream = sys.stdin.buffer - try: - stream = stream.decode(encoding) - except UnicodeDecodeError as e: - fatal('Unable to decode input: %s, start: %d, end: %d' % (e.reason, e.start, e.end)) + for line_nr, line in enumerate(stream): + try: + line = line.decode(encoding) + except UnicodeDecodeError as e: + fatal('Unable to decode input line %s: %s, start: %d, end: %d' % (line_nr, e.reason, e.start, e.end)) - sys.stdout.write(unidecode(stream)) + sys.stdout.write(unidecode(line)) + stream.close() |