diff options
Diffstat (limited to 'sqlparse')
| -rw-r--r-- | sqlparse/__main__.py | 144 | ||||
| -rw-r--r-- | sqlparse/engine/grouping.py | 33 | ||||
| -rw-r--r-- | sqlparse/filters/others.py | 4 | ||||
| -rw-r--r-- | sqlparse/sql.py | 46 | ||||
| -rw-r--r-- | sqlparse/utils.py | 4 |
5 files changed, 189 insertions, 42 deletions
diff --git a/sqlparse/__main__.py b/sqlparse/__main__.py new file mode 100644 index 0000000..28abb6c --- /dev/null +++ b/sqlparse/__main__.py @@ -0,0 +1,144 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- +# +# Copyright (C) 2016 Andi Albrecht, albrecht.andi@gmail.com +# +# This module is part of python-sqlparse and is released under +# the BSD License: http://www.opensource.org/licenses/bsd-license.php + +import argparse +import sys + +import sqlparse +from sqlparse.compat import PY2 +from sqlparse.exceptions import SQLParseError + +_CASE_CHOICES = ['upper', 'lower', 'capitalize'] + +# TODO: Add CLI Tests +# TODO: Simplify formatter by using argparse `type` arguments +parser = argparse.ArgumentParser( + prog='sqlparse', + description='Format FILE according to OPTIONS. Use "-" as FILE ' + 'to read from stdin.', + usage='%(prog)s [OPTIONS] FILE, ...', + version=sqlparse.__version__,) + +parser.add_argument('filename') + +parser.add_argument( + '-o', '--outfile', + dest='outfile', + metavar='FILE', + help='write output to FILE (defaults to stdout)') + +group = parser.add_argument_group('Formatting Options') + +group.add_argument( + '-k', '--keywords', + metavar='CHOICE', + dest='keyword_case', + choices=_CASE_CHOICES, + help='change case of keywords, CHOICE is one of {0}'.format( + ', '.join('"{0}"'.format(x) for x in _CASE_CHOICES))) + +group.add_argument( + '-i', '--identifiers', + metavar='CHOICE', + dest='identifier_case', + choices=_CASE_CHOICES, + help='change case of identifiers, CHOICE is one of {0}'.format( + ', '.join('"{0}"'.format(x) for x in _CASE_CHOICES))) + +group.add_argument( + '-l', '--language', + metavar='LANG', + dest='output_format', + choices=['python', 'php'], + help='output a snippet in programming language LANG, ' + 'choices are "python", "php"') + +group.add_argument( + '--strip-comments', + dest='strip_comments', + action='store_true', + default=False, + help='remove comments') + +group.add_argument( + '-r', '--reindent', + dest='reindent', + action='store_true', + default=False, + help='reindent statements') + +group.add_argument( + '--indent_width', + dest='indent_width', + default=2, + type=int, + help='indentation width (defaults to 2 spaces)') + +group.add_argument( + '-a', '--reindent_aligned', + action='store_true', + default=False, + help='reindent statements to aligned format') + +group.add_argument( + '-s', '--use_space_around_operators', + action='store_true', + default=False, + help='place spaces around mathematical operators') + +group.add_argument( + '--wrap_after', + dest='wrap_after', + default=0, + type=int, + help='Column after which lists should be wrapped') + + +def _error(msg): + """Print msg and optionally exit with return code exit_.""" + sys.stderr.write('[ERROR] %s\n' % msg) + + +def main(args=None): + args = parser.parse_args(args) + + if args.filename == '-': # read from stdin + data = sys.stdin.read() + else: + try: + data = ''.join(open(args.filename).readlines()) + except IOError as e: + _error('Failed to read %s: %s' % (args.filename, e)) + return 1 + + if args.outfile: + try: + stream = open(args.outfile, 'w') + except IOError as e: + _error('Failed to open %s: %s' % (args.outfile, e)) + return 1 + else: + stream = sys.stdout + + formatter_opts = vars(args) + try: + formatter_opts = sqlparse.formatter.validate_options(formatter_opts) + except SQLParseError as e: + _error('Invalid options: %s' % e) + return 1 + + s = sqlparse.format(data, **formatter_opts) + if PY2: + s = s.encode('utf-8', 'replace') + stream.write(s) + stream.flush() + return 0 + + +if __name__ == '__main__': + sys.exit(main()) diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 91bb3d9..6e414b8 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -23,11 +23,15 @@ def _group_left_right(tlist, m, cls, valid_right=lambda t: t is not None, semicolon=False): """Groups together tokens that are joined by a middle token. ie. x < y""" - [_group_left_right(sgroup, m, cls, valid_left, valid_right, semicolon) - for sgroup in tlist.get_sublists() if not isinstance(sgroup, cls)] - token = tlist.token_next_by(m=m) - while token: + for token in list(tlist): + if token.is_group() and not isinstance(token, cls): + _group_left_right(token, m, cls, valid_left, valid_right, + semicolon) + + if not token.match(*m): + continue + left, right = tlist.token_prev(token), tlist.token_next(token) if valid_left(left) and valid_right(right): @@ -36,8 +40,7 @@ def _group_left_right(tlist, m, cls, sright = tlist.token_next_by(m=M_SEMICOLON, idx=right) right = sright or right tokens = tlist.tokens_between(left, right) - token = tlist.group_tokens(cls, tokens, extend=True) - token = tlist.token_next_by(m=m, idx=token) + tlist.group_tokens(cls, tokens, extend=True) def _group_matching(tlist, cls): @@ -85,11 +88,12 @@ def group_assignment(tlist): def group_comparison(tlist): - I_COMPERABLE = (sql.Parenthesis, sql.Function, sql.Identifier) + I_COMPERABLE = (sql.Parenthesis, sql.Function, sql.Identifier, + sql.Operation) T_COMPERABLE = T_NUMERICAL + T_STRING + T_NAME - func = lambda tk: imt(tk, t=T_COMPERABLE, i=I_COMPERABLE) or ( - imt(tk, t=T.Keyword) and tk.value.upper() == 'NULL') + func = lambda tk: (imt(tk, t=T_COMPERABLE, i=I_COMPERABLE) or + (tk and tk.is_keyword and tk.normalized == 'NULL')) _group_left_right(tlist, (T.Operator.Comparison, None), sql.Comparison, valid_left=func, valid_right=func) @@ -134,9 +138,9 @@ def group_arrays(tlist): @recurse(sql.Identifier) def group_operator(tlist): I_CYCLE = (sql.SquareBrackets, sql.Parenthesis, sql.Function, - sql.Identifier,) # sql.Operation) + sql.Identifier, sql.Operation) # wilcards wouldn't have operations next to them - T_CYCLE = T_NUMERICAL + T_STRING + T_NAME # + T.Wildcard + T_CYCLE = T_NUMERICAL + T_STRING + T_NAME func = lambda tk: imt(tk, i=I_CYCLE, t=T_CYCLE) token = tlist.token_next_by(t=(T.Operator, T.Wildcard)) @@ -146,8 +150,7 @@ def group_operator(tlist): if func(left) and func(right): token.ttype = T.Operator tokens = tlist.tokens_between(left, right) - # token = tlist.group_tokens(sql.Operation, tokens) - token = tlist.group_tokens(sql.Identifier, tokens) + token = tlist.group_tokens(sql.Operation, tokens) token = tlist.token_next_by(t=(T.Operator, T.Wildcard), idx=token) @@ -155,7 +158,7 @@ def group_operator(tlist): @recurse(sql.IdentifierList) def group_identifier_list(tlist): I_IDENT_LIST = (sql.Function, sql.Case, sql.Identifier, sql.Comparison, - sql.IdentifierList) # sql.Operation + sql.IdentifierList, sql.Operation) T_IDENT_LIST = (T_NUMERICAL + T_STRING + T_NAME + (T.Keyword, T.Comment, T.Wildcard)) @@ -212,7 +215,7 @@ def group_where(tlist): @recurse() def group_aliased(tlist): I_ALIAS = (sql.Parenthesis, sql.Function, sql.Case, sql.Identifier, - ) # sql.Operation) + sql.Operation) token = tlist.token_next_by(i=I_ALIAS, t=T.Number) while token: diff --git a/sqlparse/filters/others.py b/sqlparse/filters/others.py index 6951c74..71b1f8e 100644 --- a/sqlparse/filters/others.py +++ b/sqlparse/filters/others.py @@ -6,7 +6,6 @@ # the BSD License: http://www.opensource.org/licenses/bsd-license.php from sqlparse import sql, tokens as T -from sqlparse.compat import text_type from sqlparse.utils import split_unquoted_newlines @@ -114,6 +113,5 @@ class SpacesAroundOperatorsFilter(object): class SerializerUnicode(object): @staticmethod def process(stmt): - raw = text_type(stmt) - lines = split_unquoted_newlines(raw) + lines = split_unquoted_newlines(stmt) return '\n'.join(line.rstrip() for line in lines) diff --git a/sqlparse/sql.py b/sqlparse/sql.py index 43a89e7..cee6af5 100644 --- a/sqlparse/sql.py +++ b/sqlparse/sql.py @@ -37,6 +37,10 @@ class Token(object): def __str__(self): return self.value + # Pending tokenlist __len__ bug fix + # def __len__(self): + # return len(self.value) + def __repr__(self): cls = self._get_repr_name() value = self._get_repr_value() @@ -141,6 +145,10 @@ class TokenList(Token): def __str__(self): return ''.join(token.value for token in self.flatten()) + # weird bug + # def __len__(self): + # return len(self.tokens) + def __iter__(self): return iter(self.tokens) @@ -152,12 +160,12 @@ class TokenList(Token): def _pprint_tree(self, max_depth=None, depth=0, f=None): """Pretty-print the object tree.""" - ind = ' ' * (depth * 2) + indent = ' | ' * depth for idx, token in enumerate(self.tokens): - pre = ' +-' if token.is_group() else ' | ' cls = token._get_repr_name() value = token._get_repr_value() - print("{ind}{pre}{idx} {cls} '{value}'".format(**locals()), file=f) + print("{indent}{idx:2d} {cls} '{value}'" + .format(**locals()), file=f) if token.is_group() and (max_depth is None or depth < max_depth): token._pprint_tree(max_depth, depth + 1, f) @@ -216,20 +224,6 @@ class TokenList(Token): if func(token): return token - def token_first(self, skip_ws=True, skip_cm=False): - """Returns the first child token. - - If *ignore_whitespace* is ``True`` (the default), whitespace - tokens are ignored. - - if *ignore_comments* is ``True`` (default: ``False``), comments are - ignored too. - """ - # this on is inconsistent, using Comment instead of T.Comment... - funcs = lambda tk: not ((skip_ws and tk.is_whitespace()) or - (skip_cm and imt(tk, i=Comment))) - return self._token_matching(funcs) - def token_next_by(self, i=None, m=None, t=None, idx=0, end=None): funcs = lambda tk: imt(tk, i, m, t) return self._token_matching(funcs, idx, end) @@ -242,24 +236,26 @@ class TokenList(Token): def token_matching(self, idx, funcs): return self._token_matching(funcs, idx) - def token_prev(self, idx, skip_ws=True, skip_cm=False): + def token_prev(self, idx=0, skip_ws=True, skip_cm=False): """Returns the previous token relative to *idx*. If *skip_ws* is ``True`` (the default) whitespace tokens are ignored. ``None`` is returned if there's no previous token. """ funcs = lambda tk: not ((skip_ws and tk.is_whitespace()) or - (skip_cm and imt(tk, t=T.Comment))) + (skip_cm and imt(tk, t=T.Comment, i=Comment))) return self._token_matching(funcs, idx, reverse=True) - def token_next(self, idx, skip_ws=True, skip_cm=False): + def token_next(self, idx=0, skip_ws=True, skip_cm=False): """Returns the next token relative to *idx*. + If called with idx = 0. Returns the first child token. If *skip_ws* is ``True`` (the default) whitespace tokens are ignored. + If *skip_cm* is ``True`` (default: ``False``), comments are ignored. ``None`` is returned if there's no next token. """ funcs = lambda tk: not ((skip_ws and tk.is_whitespace()) or - (skip_cm and imt(tk, t=T.Comment))) + (skip_cm and imt(tk, t=T.Comment, i=Comment))) return self._token_matching(funcs, idx) def token_index(self, token, start=0): @@ -387,7 +383,7 @@ class Statement(TokenList): Whitespaces and comments at the beginning of the statement are ignored. """ - first_token = self.token_first(skip_cm=True) + first_token = self.token_next(skip_cm=True) if first_token is None: # An "empty" statement that either has not tokens at all # or only whitespace tokens. @@ -425,7 +421,7 @@ class Identifier(TokenList): def get_typecast(self): """Returns the typecast or ``None`` of this object as a string.""" marker = self.token_next_by(m=(T.Punctuation, '::')) - next_ = self.token_next(marker, False) + next_ = self.token_next(marker, skip_ws=False) return next_.value if next_ else None def get_ordering(self): @@ -588,3 +584,7 @@ class Begin(TokenList): """A BEGIN/END block.""" M_OPEN = T.Keyword, 'BEGIN' M_CLOSE = T.Keyword, 'END' + + +class Operation(TokenList): + """Grouping of operations""" diff --git a/sqlparse/utils.py b/sqlparse/utils.py index 8253e0b..4a8646d 100644 --- a/sqlparse/utils.py +++ b/sqlparse/utils.py @@ -9,6 +9,7 @@ import itertools import re from collections import deque from contextlib import contextmanager +from sqlparse.compat import text_type # This regular expression replaces the home-cooked parser that was here before. # It is much faster, but requires an extra post-processing step to get the @@ -33,11 +34,12 @@ SPLIT_REGEX = re.compile(r""" LINE_MATCH = re.compile(r'(\r\n|\r|\n)') -def split_unquoted_newlines(text): +def split_unquoted_newlines(stmt): """Split a string on all unquoted newlines. Unlike str.splitlines(), this will ignore CR/LF/CR+LF if the requisite character is inside of a string.""" + text = text_type(stmt) lines = SPLIT_REGEX.split(text) outputlines = [''] for line in lines: |
