diff options
| author | Jesús Leganés Combarro "Piranna" <piranna@gmail.com> | 2012-06-03 13:25:55 +0200 |
|---|---|---|
| committer | Jesús Leganés Combarro "Piranna" <piranna@gmail.com> | 2012-06-03 13:25:55 +0200 |
| commit | 5771365dee7f434a58f885cccfdcf6696335542b (patch) | |
| tree | 0c501922b5bc34f0e3e1ef07bc4dd4dc83c138e7 /sqlparse | |
| parent | 7728323fb24d825a47ff29d85d9a64667de7e47f (diff) | |
| parent | 49b41027a3cb88b74f9d4ffb1adb34367c7763ce (diff) | |
| download | sqlparse-5771365dee7f434a58f885cccfdcf6696335542b.tar.gz | |
Merge branch 'issue_50' into milestone_0.1.5
Diffstat (limited to 'sqlparse')
| -rw-r--r-- | sqlparse/engine/grouping.py | 80 | ||||
| -rw-r--r-- | sqlparse/filters.py | 257 | ||||
| -rw-r--r-- | sqlparse/formatter.py | 18 |
3 files changed, 269 insertions, 86 deletions
diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py index 1487c24..b82a317 100644 --- a/sqlparse/engine/grouping.py +++ b/sqlparse/engine/grouping.py @@ -185,9 +185,11 @@ def group_identifier(tlist): def group_identifier_list(tlist): - [group_identifier_list(sgroup) for sgroup in tlist.get_sublists() - if not isinstance(sgroup, sql.IdentifierList)] - idx = 0 + # First group the `tlist` sublists + for sgroup in tlist.get_sublists(): + if not isinstance(sgroup, sql.IdentifierList): + group_identifier_list(sgroup) + # Allowed list items fend1_funcs = [lambda t: isinstance(t, (sql.Identifier, sql.Function, sql.Case)), @@ -202,36 +204,64 @@ def group_identifier_list(tlist): lambda t: isinstance(t, sql.Comparison), lambda t: isinstance(t, sql.Comment), ] - tcomma = tlist.token_next_match(idx, T.Punctuation, ',') + + def group_identifierlist(start, after): + """ + Create and group the identifiers list + """ + tokens = tlist.tokens_between(start, after) + return tlist.group_tokens(sql.IdentifierList, tokens) + + # Search for the first identifier list start = None - while tcomma is not None: + tcomma = tlist.token_next_match(0, T.Punctuation, ',') + + while tcomma: before = tlist.token_prev(tcomma) after = tlist.token_next(tcomma) - # Check if the tokens around tcomma belong to a list + + # Check if the tokens around tcomma belong to an identifier list bpassed = apassed = False for func in fend1_funcs: - if before is not None and func(before): + if before and func(before): bpassed = True - if after is not None and func(after): + if after and func(after): apassed = True - if not bpassed or not apassed: - # Something's wrong here, skip ahead to next "," - start = None - tcomma = tlist.token_next_match(tlist.token_index(tcomma) + 1, - T.Punctuation, ',') - else: - if start is None: + + # Both tokens around tcomma belong to a list + if bpassed and apassed: + # Set the start of the identifier list if not defined before + if start == None: start = before - next_ = tlist.token_next(after) - if next_ is None or not next_.match(T.Punctuation, ','): - # Reached the end of the list - tokens = tlist.tokens_between(start, after) - group = tlist.group_tokens(sql.IdentifierList, tokens) - start = None - tcomma = tlist.token_next_match(tlist.token_index(group) + 1, - T.Punctuation, ',') - else: - tcomma = next_ + + # Check the next token + next_ = tlist.token_next(after) + while next_: + if next_.value != ',': + passed = False + for func in fend1_funcs: + if func(next_): + passed = True + break + + if not passed: + break + + after = next_ + next_ = tlist.token_next(next_) + + # Reached the end of the list + # Create and group the identifiers list + tcomma = group_identifierlist(start, after) + + # Skip ahead to next identifier list + start = None + tcomma = tlist.token_next_match(tlist.token_index(tcomma) + 1, + T.Punctuation, ',') + + # There's an open identifier list, create and group the identifiers list + if start: + group_identifierlist(start, after) def group_parenthesis(tlist): diff --git a/sqlparse/filters.py b/sqlparse/filters.py index fd1bb2b..fcd1c8d 100644 --- a/sqlparse/filters.py +++ b/sqlparse/filters.py @@ -262,6 +262,9 @@ class StripWhitespaceFilter: class ReindentFilter: + """ + Filter that return a correctly indented version of the SQL string + """ def __init__(self, width=2, char=' ', line_width=None): self.width = width @@ -273,159 +276,284 @@ class ReindentFilter: self._last_stmt = None def _get_offset(self, token): + """ + Return the offset where the token should be indented + """ + # Get last processed line (the current one) up to the next token all_ = list(self._curr_stmt.flatten()) idx = all_.index(token) raw = ''.join(unicode(x) for x in all_[:idx + 1]) line = raw.splitlines()[-1] + # Now take current offset into account and return relative offset. - full_offset = len(line) - len(self.char * (self.width * self.indent)) + full_offset = len(line) - len(self.char * self.width * self.indent) return full_offset - self.offset def nl(self): + """ + Return an indented new line token + """ # TODO: newline character should be configurable - ws = '\n' + (self.char * ((self.indent * self.width) + self.offset)) + ws = '\n' + self.char * (self.indent * self.width + self.offset) return sql.Token(T.Whitespace, ws) def _split_kwds(self, tlist): + """ + Split `tlist` by its keywords + """ split_words = ('FROM', 'JOIN$', 'AND', 'OR', 'GROUP', 'ORDER', 'UNION', 'VALUES', 'SET', 'BETWEEN') def _next_token(i): - t = tlist.token_next_match(i, T.Keyword, split_words, - regex=True) + """ + Get next keyword where to split + """ + # Search for the first keyword token + t = tlist.token_next_match(i, T.Keyword, split_words, regex=True) + + # Use the BETWEEN ... AND ... struct as an unsplitable statement if t and t.value.upper() == 'BETWEEN': t = _next_token(tlist.token_index(t) + 1) if t and t.value.upper() == 'AND': t = _next_token(tlist.token_index(t) + 1) + + # Return the token return t - idx = 0 - token = _next_token(idx) + # Get first token + token = _next_token(0) while token: - prev = tlist.token_prev(tlist.token_index(token), False) offset = 1 - if prev and prev.is_whitespace(): - tlist.tokens.pop(tlist.token_index(prev)) - offset += 1 - if (prev - and isinstance(prev, sql.Comment) - and (str(prev).endswith('\n') - or str(prev).endswith('\r'))): - nl = tlist.token_next(token) - else: + nl = None + + # Check if we have any token before + prev = tlist.token_prev(tlist.token_index(token), False) + if prev: + # Previous token was a whitespace, increase offset + if prev.is_whitespace(): + tlist.tokens.pop(tlist.token_index(prev)) + offset += 1 + + # Previous token was a comment, add new line if necessary + if isinstance(prev, sql.Comment): + prev = str(prev) + if prev.endswith('\n') or prev.endswith('\r'): + nl = tlist.token_next(token) + + # New line was not added, set it now + if nl == None: nl = self.nl() tlist.insert_before(token, nl) + + # Add token now token = _next_token(tlist.token_index(nl) + offset) def _split_statements(self, tlist): - idx = 0 - token = tlist.token_next_by_type(idx, (T.Keyword.DDL, T.Keyword.DML)) + """ + Split tlist on statements + """ + # Search for the first statement + token = tlist.token_next_by_type(0, (T.Keyword.DDL, T.Keyword.DML)) + while token: prev = tlist.token_prev(tlist.token_index(token), False) - if prev and prev.is_whitespace(): - tlist.tokens.pop(tlist.token_index(prev)) - # only break if it's not the first token if prev: + if prev.is_whitespace(): + tlist.tokens.pop(tlist.token_index(prev)) + + # only break if it's not the first token nl = self.nl() tlist.insert_before(token, nl) + + # Go to the next statement token = tlist.token_next_by_type(tlist.token_index(token) + 1, (T.Keyword.DDL, T.Keyword.DML)) def _process(self, tlist): + """ + Proxy to other methods based on `tlist` class + """ func_name = '_process_%s' % tlist.__class__.__name__.lower() func = getattr(self, func_name, self._process_default) func(tlist) def _process_where(self, tlist): + """ + Process WHERE statement + """ + # Look for the next WHERE keyword and add a new line token = tlist.token_next_match(0, T.Keyword, 'WHERE') tlist.insert_before(token, self.nl()) + + # Indent and process the (indented) WHERE statement as usual self.indent += 1 self._process_default(tlist) self.indent -= 1 def _process_parenthesis(self, tlist): + """ + Process parenthesis + """ + # Omit the 'open parenthesis' token + # and check if the next one require say us we should indent first = tlist.token_next(0) - indented = False - if first and first.ttype in (T.Keyword.DML, T.Keyword.DDL): + indented = first and first.ttype in (T.Keyword.DML, T.Keyword.DDL) + + # If we should indent, increase indent and add a new line + if indented: self.indent += 1 tlist.tokens.insert(0, self.nl()) - indented = True - num_offset = self._get_offset(tlist.token_next_match(0, - T.Punctuation, '(')) + + # Get indentation offset + token = tlist.token_next_match(0, T.Punctuation, '(') + num_offset = self._get_offset(token) + + # Increase indentation offset and process the statement as usual self.offset += num_offset self._process_default(tlist, stmts=not indented) + self.offset -= num_offset + + # If we indented, decrease indent to previous state if indented: self.indent -= 1 - self.offset -= num_offset def _process_identifierlist(self, tlist): - identifiers = list(tlist.get_identifiers()) - if len(identifiers) > 1 and not tlist.within(sql.Function): - first = list(identifiers[0].flatten())[0] - num_offset = self._get_offset(first) - len(first.value) - self.offset += num_offset - for token in identifiers[1:]: - tlist.insert_before(token, self.nl()) - for token in tlist.tokens: - if isinstance(token, sql.Comment): - tlist.insert_after(token, self.nl()) - self.offset -= num_offset + """ + Process an identifier list + + If there are more than an identifier, put each on a line + """ + # Split the identifier list if we are not in a function + if not tlist.within(sql.Function): + # Get identifiers from the tlist + identifiers = list(tlist.get_identifiers()) + + # Split the identifier list if we have more than one identifier + if len(identifiers) > 1: + # Get first token + first = list(identifiers[0].flatten())[0] + + # Increase offset the size of the first token + num_offset = self._get_offset(first) - len(first.value) + + # Increase offset and insert new lines + self.offset += num_offset + offset = 0 + + # Insert a new line between the tokens + ignore = False + for token in identifiers[1:]: + if not ignore: + tlist.insert_before(token, self.nl()) + ignore = token.ttype + + # Check identifiers offset + if token.ttype: + l = len(token.value) + if offset < l: + offset = l + + # Imsert another new line after comment tokens + for token in tlist.tokens: + if isinstance(token, sql.Comment): + tlist.insert_after(token, self.nl()) + + # Update identifiers offset + if offset: + offset += 1 + + ignore = False + for token in identifiers: + if not ignore and not token.ttype: + tlist.insert_before(token, sql.Token(T.Whitespace, + " " * offset)) + ignore = token.ttype + + # Decrease offset the size of the first token + self.offset -= num_offset + + # Process the identifier list as usual self._process_default(tlist) def _process_case(self, tlist): - is_first = True - num_offset = None + """ + Process a CASE statement + """ + # Increase the offset the size of the CASE keyword case = tlist.tokens[0] outer_offset = self._get_offset(case) - len(case.value) self.offset += outer_offset - for cond, value in tlist.get_cases(): - if is_first: - tcond = list(cond[0].flatten())[0] - is_first = False - num_offset = self._get_offset(tcond) - len(tcond.value) - self.offset += num_offset - continue + + # Get the case conditions + cases = tlist.get_cases() + + # Get and increase the offset the size of the condition selector + cond, value = cases[0] + tcond = list(cond[0].flatten())[0] + num_offset = self._get_offset(tcond) - len(tcond.value) + self.offset += num_offset + + # Insert a new line before each condition + for cond, value in cases[1:]: if cond is None: token = value[0] else: token = cond[0] + tlist.insert_before(token, self.nl()) + # Line breaks on group level are done. Now let's add an offset of # 5 (=length of "when", "then", "else") and process subgroups. self.offset += 5 self._process_default(tlist) self.offset -= 5 - if num_offset is not None: - self.offset -= num_offset + + # Decrease the offset the size of the condition selector + self.offset -= num_offset + + # Insert a new line before the case END keyword end = tlist.token_next_match(0, T.Keyword, 'END') tlist.insert_before(end, self.nl()) + + # Decrease the offset the size of the CASE keyword self.offset -= outer_offset def _process_default(self, tlist, stmts=True, kwds=True): + """ + Generic processing of `tlist` statements + """ if stmts: self._split_statements(tlist) if kwds: self._split_kwds(tlist) - [self._process(sgroup) for sgroup in tlist.get_sublists()] + + for sgroup in tlist.get_sublists(): + self._process(sgroup) def process(self, stack, stmt): warn("Deprecated, use callable objects. This will be removed at 0.2.0", DeprecationWarning) + # If we are processing a statement, set it as the current one if isinstance(stmt, sql.Statement): self._curr_stmt = stmt + + # Process the statement self._process(stmt) + + # If we are processing a statement, check if we should add a new line if isinstance(stmt, sql.Statement): - if self._last_stmt is not None: + if self._last_stmt: if unicode(self._last_stmt).endswith('\n'): nl = '\n' else: nl = '\n\n' - stmt.tokens.insert(0, - sql.Token(T.Whitespace, nl)) - if self._last_stmt != stmt: - self._last_stmt = stmt + + stmt.tokens.insert(0, sql.Token(T.Whitespace, nl)) + + # Set the statement as the current one + self._last_stmt = stmt # FIXME: Doesn't work ;) @@ -491,17 +619,21 @@ class ColumnsSelect: mode = 1 # We have detected a SELECT statement - elif mode == 1: - if value == 'FROM': + elif mode in (1, 3): + if value in ('FROM', 'WHERE', 'GROUP'): if oldValue: yield oldValue + oldValue = "" - mode = 3 # Columns have been checked + break # Columns have been checked elif value == 'AS': oldValue = "" mode = 2 + elif token_type in Whitespace: + mode = 3 + elif (token_type == Punctuation and value == ',' and not parenthesis): if oldValue: @@ -514,7 +646,11 @@ class ColumnsSelect: elif value == ')': parenthesis -= 1 - oldValue += value + if mode == 3: + oldValue = value + mode = 1 + else: + oldValue += value # We are processing an AS keyword elif mode == 2: @@ -523,6 +659,9 @@ class ColumnsSelect: yield value mode = 1 + if oldValue: + yield oldValue + # --------------------------- # postprocess diff --git a/sqlparse/formatter.py b/sqlparse/formatter.py index 5be6652..f182850 100644 --- a/sqlparse/formatter.py +++ b/sqlparse/formatter.py @@ -11,34 +11,43 @@ from sqlparse import filters def validate_options(options): """Validates options.""" + + # keyword_case kwcase = options.get('keyword_case', None) if kwcase not in [None, 'upper', 'lower', 'capitalize']: raise SQLParseError('Invalid value for keyword_case: %r' % kwcase) + # identifier_case idcase = options.get('identifier_case', None) if idcase not in [None, 'upper', 'lower', 'capitalize']: raise SQLParseError('Invalid value for identifier_case: %r' % idcase) + # output_format ofrmt = options.get('output_format', None) if ofrmt not in [None, 'sql', 'python', 'php']: raise SQLParseError('Unknown output format: %r' % ofrmt) + # strip_comments strip_comments = options.get('strip_comments', False) if strip_comments not in [True, False]: raise SQLParseError('Invalid value for strip_comments: %r' % strip_comments) + # strip_whitespace strip_ws = options.get('strip_whitespace', False) if strip_ws not in [True, False]: raise SQLParseError('Invalid value for strip_whitespace: %r' % strip_ws) + # reindent reindent = options.get('reindent', False) if reindent not in [True, False]: raise SQLParseError('Invalid value for reindent: %r' % reindent) elif reindent: options['strip_whitespace'] = True + + # indent_tabs indent_tabs = options.get('indent_tabs', False) if indent_tabs not in [True, False]: raise SQLParseError('Invalid value for indent_tabs: %r' % indent_tabs) @@ -46,15 +55,20 @@ def validate_options(options): options['indent_char'] = '\t' else: options['indent_char'] = ' ' + + # indent_width indent_width = options.get('indent_width', 2) try: indent_width = int(indent_width) except (TypeError, ValueError): raise SQLParseError('indent_width requires an integer') + if indent_width < 1: raise SQLParseError('indent_width requires an positive integer') + options['indent_width'] = indent_width + # right_margin right_margin = options.get('right_margin', None) if right_margin is not None: try: @@ -65,6 +79,7 @@ def validate_options(options): raise SQLParseError('right_margin requires an integer > 10') options['right_margin'] = right_margin + # return the processed options return options @@ -89,8 +104,7 @@ def build_filter_stack(stack, options): stack.enable_grouping() stack.stmtprocess.append(filters.StripCommentsFilter()) - if (options.get('strip_whitespace', False) - or options.get('reindent', False)): + if options.get('strip_whitespace', False): stack.enable_grouping() stack.stmtprocess.append(filters.StripWhitespaceFilter()) |
