summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTao Wang <twang2218@gmail.com>2017-01-11 20:38:30 +1100
committerTao Wang <twang2218@gmail.com>2017-01-12 14:20:32 +1100
commitf6e9141d3131954c7576faf059ceca8a652f3dad (patch)
tree8249bcfc5763a18b208afb2c4236a2188efaf286
parentc92e281c19f43bbcb945b5a8c43211263ee25386 (diff)
downloadsqlparse-f6e9141d3131954c7576faf059ceca8a652f3dad.tar.gz
CLI: Add --encoding option
* Add `--encoding` option with default utf-8 * Make sure input and output are in same encoding * Add test cases Signed-off-by: Tao Wang <twang2218@gmail.com>
-rwxr-xr-xsqlparse/cli.py21
-rw-r--r--tests/files/encoding_gbk.sql3
-rw-r--r--tests/files/encoding_utf8.sql3
-rw-r--r--tests/test_cli.py60
4 files changed, 81 insertions, 6 deletions
diff --git a/sqlparse/cli.py b/sqlparse/cli.py
index bd2e1b8..0b5c204 100755
--- a/sqlparse/cli.py
+++ b/sqlparse/cli.py
@@ -21,6 +21,8 @@ Why does this file exist, and why not put this in __main__?
import argparse
import sys
+from io import TextIOWrapper
+from codecs import open, getreader
import sqlparse
from sqlparse.compat import PY2
@@ -125,6 +127,12 @@ def create_parser():
type=bool,
help='Insert linebreak before comma (default False)')
+ group.add_argument(
+ '--encoding',
+ dest='encoding',
+ default='utf-8',
+ help='Specify the input encoding (default utf-8)')
+
return parser
@@ -139,18 +147,21 @@ def main(args=None):
args = parser.parse_args(args)
if args.filename == '-': # read from stdin
- data = sys.stdin.read()
+ if PY2:
+ data = getreader(args.encoding)(sys.stdin).read()
+ else:
+ data = TextIOWrapper(
+ sys.stdin.buffer, encoding=args.encoding).read()
else:
try:
- # TODO: Needs to deal with encoding
- data = ''.join(open(args.filename).readlines())
+ data = ''.join(open(args.filename, 'r', args.encoding).readlines())
except IOError as e:
return _error(
u'Failed to read {0}: {1}'.format(args.filename, e))
if args.outfile:
try:
- stream = open(args.outfile, 'w')
+ stream = open(args.outfile, 'w', args.encoding)
except IOError as e:
return _error(u'Failed to open {0}: {1}'.format(args.outfile, e))
else:
@@ -163,8 +174,6 @@ def main(args=None):
return _error(u'Invalid options: {0}'.format(e))
s = sqlparse.format(data, **formatter_opts)
- if PY2:
- s = s.encode('utf-8', 'replace')
stream.write(s)
stream.flush()
return 0
diff --git a/tests/files/encoding_gbk.sql b/tests/files/encoding_gbk.sql
new file mode 100644
index 0000000..a613229
--- /dev/null
+++ b/tests/files/encoding_gbk.sql
@@ -0,0 +1,3 @@
+select *
+from foo
+where bar = '不以物喜,不以己悲' \ No newline at end of file
diff --git a/tests/files/encoding_utf8.sql b/tests/files/encoding_utf8.sql
new file mode 100644
index 0000000..26e7ad4
--- /dev/null
+++ b/tests/files/encoding_utf8.sql
@@ -0,0 +1,3 @@
+select *
+from foo
+where bar = '榻愬ぉ澶у湥.銈儵銉曘儷銇洸.靷瀾頃挫殧' \ No newline at end of file
diff --git a/tests/test_cli.py b/tests/test_cli.py
index 77a764e..fd73f60 100644
--- a/tests/test_cli.py
+++ b/tests/test_cli.py
@@ -73,3 +73,63 @@ def test_script():
# Call with the --help option as a basic sanity check.
cmd = "{0:s} -m sqlparse.cli --help".format(sys.executable)
assert subprocess.call(cmd.split()) == 0
+
+
+def test_encoding_utf8_stdout(filepath, load_file, capfd):
+ path = filepath('encoding_utf8.sql')
+ expected = load_file('encoding_utf8.sql', 'utf-8')
+ sys.stdout.encoding = 'utf-8'
+ sqlparse.cli.main([path])
+ out, _ = capfd.readouterr()
+ assert out == expected
+
+
+def test_encoding_utf8_output_file(filepath, load_file, tmpdir):
+ in_path = filepath('encoding_utf8.sql')
+ expected = load_file('encoding_utf8.sql', 'utf-8')
+ out_path = tmpdir.dirname + '/encoding_utf8.out.sql'
+ sqlparse.cli.main([in_path, '-o', out_path])
+ out = load_file(out_path, 'utf-8')
+ assert out == expected
+
+
+def test_encoding_gbk_stdout(filepath, load_file, capfd):
+ path = filepath('encoding_gbk.sql')
+ expected = load_file('encoding_gbk.sql', 'gbk')
+ sys.stdout.encoding = 'gbk'
+ sqlparse.cli.main([path, '--encoding', 'gbk'])
+ out, _ = capfd.readouterr()
+ assert out == expected
+
+
+def test_encoding_gbk_output_file(filepath, load_file, tmpdir):
+ in_path = filepath('encoding_gbk.sql')
+ expected = load_file('encoding_gbk.sql', 'gbk')
+ out_path = tmpdir.dirname + '/encoding_gbk.out.sql'
+ sqlparse.cli.main([in_path, '--encoding', 'gbk', '-o', out_path])
+ out = load_file(out_path, 'gbk')
+ assert out == expected
+
+
+def test_encoding_stdin_utf8(filepath, load_file, capfd):
+ path = filepath('encoding_utf8.sql')
+ expected = load_file('encoding_utf8.sql', 'utf-8')
+ old_stdin = sys.stdin
+ sys.stdin = open(path, 'r')
+ sys.stdout.encoding = 'utf-8'
+ sqlparse.cli.main(['-'])
+ sys.stdin = old_stdin
+ out, _ = capfd.readouterr()
+ assert out == expected
+
+
+def test_encoding_stdin_gbk(filepath, load_file, capfd):
+ path = filepath('encoding_gbk.sql')
+ expected = load_file('encoding_gbk.sql', 'gbk')
+ old_stdin = sys.stdin
+ sys.stdin = open(path, 'r')
+ sys.stdout.encoding = 'gbk'
+ sqlparse.cli.main(['-', '--encoding', 'gbk'])
+ sys.stdin = old_stdin
+ out, _ = capfd.readouterr()
+ assert out == expected