summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJan-Thorsten Peter <peter@cs.rwth-aachen.de>2022-02-18 12:41:46 +0100
committerJan-Thorsten Peter <peter@cs.rwth-aachen.de>2022-02-18 18:17:02 +0100
commitfd6f0bd4f7bee934d7f4975ce4997793c4a26657 (patch)
tree614e4fa0eaf0ed6286dd8f663d02614f75c5cd0c
parent1c74053bbbf5ce92bc1fc8faaf37e903aa90d011 (diff)
downloadunidecode-fd6f0bd4f7bee934d7f4975ce4997793c4a26657.tar.gz
Read input line by line
-rw-r--r--tests/test_utility.py2
-rw-r--r--unidecode/util.py22
2 files changed, 13 insertions, 11 deletions
diff --git a/tests/test_utility.py b/tests/test_utility.py
index 04d31bb..844a6ec 100644
--- a/tests/test_utility.py
+++ b/tests/test_utility.py
@@ -40,7 +40,7 @@ class TestUnidecodeUtility(unittest.TestCase):
out, err, rc = run(['-e', 'utf8', f.name])
# Text after : ... can differ between Python versions
- self.assertRegex(err, '^Unable to decode input: ')
+ self.assertRegex(err, '^Unable to decode input line ')
self.assertEqual(rc, 1)
def test_file_specified_encoding(self):
diff --git a/unidecode/util.py b/unidecode/util.py
index 415bd7b..05f08bd 100644
--- a/unidecode/util.py
+++ b/unidecode/util.py
@@ -1,5 +1,6 @@
# vim:ts=4 sw=4 expandtab softtabstop=4
import argparse
+import io
import locale
import os
import sys
@@ -30,20 +31,21 @@ def main():
if args.text:
fatal("Can't use both FILE and -c option")
else:
- with open(args.path, 'rb') as f:
- stream = f.read()
+ stream = open(args.path, 'rb')
elif args.text:
- stream = os.fsencode(args.text)
+ text = os.fsencode(args.text)
# add a newline to the string if it comes from the
# command line so that the result is printed nicely
# on the console.
- stream += b'\n'
+ stream = io.BytesIO(text + b'\n')
else:
- stream = sys.stdin.buffer.read()
+ stream = sys.stdin.buffer
- try:
- stream = stream.decode(encoding)
- except UnicodeDecodeError as e:
- fatal('Unable to decode input: %s, start: %d, end: %d' % (e.reason, e.start, e.end))
+ for line_nr, line in enumerate(stream):
+ try:
+ line = line.decode(encoding)
+ except UnicodeDecodeError as e:
+ fatal('Unable to decode input line %s: %s, start: %d, end: %d' % (line_nr, e.reason, e.start, e.end))
- sys.stdout.write(unidecode(stream))
+ sys.stdout.write(unidecode(line))
+ stream.close()