Merge branch 'issue_50' into milestone_0.1.5

author: Jesús Leganés Combarro "Piranna" <piranna@gmail.com> 2012-06-03 13:25:55 +0200
committer: Jesús Leganés Combarro "Piranna" <piranna@gmail.com> 2012-06-03 13:25:55 +0200
commit: 5771365dee7f434a58f885cccfdcf6696335542b (patch)
tree: 0c501922b5bc34f0e3e1ef07bc4dd4dc83c138e7 /sqlparse
parent: 7728323fb24d825a47ff29d85d9a64667de7e47f (diff)
parent: 49b41027a3cb88b74f9d4ffb1adb34367c7763ce (diff)
download: sqlparse-5771365dee7f434a58f885cccfdcf6696335542b.tar.gz
3 files changed, 269 insertions, 86 deletions
diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py
index 1487c24..b82a317 100644
--- a/sqlparse/engine/grouping.py
+++ b/sqlparse/engine/grouping.py
@@ -185,9 +185,11 @@ def group_identifier(tlist):
 
 
 def group_identifier_list(tlist):
-    [group_identifier_list(sgroup) for sgroup in tlist.get_sublists()
-     if not isinstance(sgroup, sql.IdentifierList)]
-    idx = 0
+    # First group the `tlist` sublists
+    for sgroup in tlist.get_sublists():
+        if not isinstance(sgroup, sql.IdentifierList):
+            group_identifier_list(sgroup)
+
     # Allowed list items
     fend1_funcs = [lambda t: isinstance(t, (sql.Identifier, sql.Function,
                                             sql.Case)),
@@ -202,36 +204,64 @@ def group_identifier_list(tlist):
                    lambda t: isinstance(t, sql.Comparison),
                    lambda t: isinstance(t, sql.Comment),
                    ]
-    tcomma = tlist.token_next_match(idx, T.Punctuation, ',')
+
+    def group_identifierlist(start, after):
+        """
+        Create and group the identifiers list
+        """
+        tokens = tlist.tokens_between(start, after)
+        return tlist.group_tokens(sql.IdentifierList, tokens)
+
+    # Search for the first identifier list
     start = None
-    while tcomma is not None:
+    tcomma = tlist.token_next_match(0, T.Punctuation, ',')
+
+    while tcomma:
         before = tlist.token_prev(tcomma)
         after = tlist.token_next(tcomma)
-        # Check if the tokens around tcomma belong to a list
+
+        # Check if the tokens around tcomma belong to an identifier list
         bpassed = apassed = False
         for func in fend1_funcs:
-            if before is not None and func(before):
+            if before and func(before):
                 bpassed = True
-            if after is not None and func(after):
+            if after and func(after):
                 apassed = True
-        if not bpassed or not apassed:
-            # Something's wrong here, skip ahead to next ","
-            start = None
-            tcomma = tlist.token_next_match(tlist.token_index(tcomma) + 1,
-                                            T.Punctuation, ',')
-        else:
-            if start is None:
+
+        # Both tokens around tcomma belong to a list
+        if bpassed and apassed:
+            # Set the start of the identifier list if not defined before
+            if start == None:
                 start = before
-            next_ = tlist.token_next(after)
-            if next_ is None or not next_.match(T.Punctuation, ','):
-                # Reached the end of the list
-                tokens = tlist.tokens_between(start, after)
-                group = tlist.group_tokens(sql.IdentifierList, tokens)
-                start = None
-                tcomma = tlist.token_next_match(tlist.token_index(group) + 1,
-                                                T.Punctuation, ',')
-            else:
-                tcomma = next_
+
+                # Check the next token
+                next_ = tlist.token_next(after)
+                while next_:
+                    if next_.value != ',':
+                        passed = False
+                        for func in fend1_funcs:
+                            if func(next_):
+                                passed = True
+                                break
+
+                        if not passed:
+                            break
+
+                    after = next_
+                    next_ = tlist.token_next(next_)
+
+            # Reached the end of the list
+            # Create and group the identifiers list
+            tcomma = group_identifierlist(start, after)
+
+        # Skip ahead to next identifier list
+        start = None
+        tcomma = tlist.token_next_match(tlist.token_index(tcomma) + 1,
+                                        T.Punctuation, ',')
+
+    # There's an open identifier list, create and group the identifiers list
+    if start:
+        group_identifierlist(start, after)
 
 
 def group_parenthesis(tlist):
diff --git a/sqlparse/filters.py b/sqlparse/filters.py
index fd1bb2b..fcd1c8d 100644
--- a/sqlparse/filters.py
+++ b/sqlparse/filters.py
@@ -262,6 +262,9 @@ class StripWhitespaceFilter:
 
 
 class ReindentFilter:
+    """
+    Filter that return a correctly indented version of the SQL string
+    """
 
     def __init__(self, width=2, char=' ', line_width=None):
         self.width = width
@@ -273,159 +276,284 @@ class ReindentFilter:
         self._last_stmt = None
 
     def _get_offset(self, token):
+        """
+        Return the offset where the token should be indented
+        """
+        # Get last processed line (the current one) up to the next token
         all_ = list(self._curr_stmt.flatten())
         idx = all_.index(token)
         raw = ''.join(unicode(x) for x in all_[:idx + 1])
         line = raw.splitlines()[-1]
+
         # Now take current offset into account and return relative offset.
-        full_offset = len(line) - len(self.char * (self.width * self.indent))
+        full_offset = len(line) - len(self.char * self.width * self.indent)
         return full_offset - self.offset
 
     def nl(self):
+        """
+        Return an indented new line token
+        """
         # TODO: newline character should be configurable
-        ws = '\n' + (self.char * ((self.indent * self.width) + self.offset))
+        ws = '\n' + self.char * (self.indent * self.width + self.offset)
         return sql.Token(T.Whitespace, ws)
 
     def _split_kwds(self, tlist):
+        """
+        Split `tlist` by its keywords
+        """
         split_words = ('FROM', 'JOIN$', 'AND', 'OR',
                        'GROUP', 'ORDER', 'UNION', 'VALUES',
                        'SET', 'BETWEEN')
 
         def _next_token(i):
-            t = tlist.token_next_match(i, T.Keyword, split_words,
-                                       regex=True)
+            """
+            Get next keyword where to split
+            """
+            # Search for the first keyword token
+            t = tlist.token_next_match(i, T.Keyword, split_words, regex=True)
+
+            # Use the BETWEEN ... AND ... struct as an unsplitable statement
             if t and t.value.upper() == 'BETWEEN':
                 t = _next_token(tlist.token_index(t) + 1)
                 if t and t.value.upper() == 'AND':
                     t = _next_token(tlist.token_index(t) + 1)
+
+            # Return the token
             return t
 
-        idx = 0
-        token = _next_token(idx)
+        # Get first token
+        token = _next_token(0)
         while token:
-            prev = tlist.token_prev(tlist.token_index(token), False)
             offset = 1
-            if prev and prev.is_whitespace():
-                tlist.tokens.pop(tlist.token_index(prev))
-                offset += 1
-            if (prev
-                and isinstance(prev, sql.Comment)
-                and (str(prev).endswith('\n')
-                     or str(prev).endswith('\r'))):
-                nl = tlist.token_next(token)
-            else:
+            nl = None
+
+            # Check if we have any token before
+            prev = tlist.token_prev(tlist.token_index(token), False)
+            if prev:
+                # Previous token was a whitespace, increase offset
+                if prev.is_whitespace():
+                    tlist.tokens.pop(tlist.token_index(prev))
+                    offset += 1
+
+                # Previous token was a comment, add new line if necessary
+                if isinstance(prev, sql.Comment):
+                    prev = str(prev)
+                    if prev.endswith('\n') or prev.endswith('\r'):
+                        nl = tlist.token_next(token)
+
+            # New line was not added, set it now
+            if nl == None:
                 nl = self.nl()
                 tlist.insert_before(token, nl)
+
+            # Add token now
             token = _next_token(tlist.token_index(nl) + offset)
 
     def _split_statements(self, tlist):
-        idx = 0
-        token = tlist.token_next_by_type(idx, (T.Keyword.DDL, T.Keyword.DML))
+        """
+        Split tlist on statements
+        """
+        # Search for the first statement
+        token = tlist.token_next_by_type(0, (T.Keyword.DDL, T.Keyword.DML))
+
         while token:
             prev = tlist.token_prev(tlist.token_index(token), False)
-            if prev and prev.is_whitespace():
-                tlist.tokens.pop(tlist.token_index(prev))
-            # only break if it's not the first token
             if prev:
+                if prev.is_whitespace():
+                    tlist.tokens.pop(tlist.token_index(prev))
+
+                # only break if it's not the first token
                 nl = self.nl()
                 tlist.insert_before(token, nl)
+
+            # Go to the next statement
             token = tlist.token_next_by_type(tlist.token_index(token) + 1,
                                              (T.Keyword.DDL, T.Keyword.DML))
 
     def _process(self, tlist):
+        """
+        Proxy to other methods based on `tlist` class
+        """
         func_name = '_process_%s' % tlist.__class__.__name__.lower()
         func = getattr(self, func_name, self._process_default)
         func(tlist)
 
     def _process_where(self, tlist):
+        """
+        Process WHERE statement
+        """
+        # Look for the next WHERE keyword and add a new line
         token = tlist.token_next_match(0, T.Keyword, 'WHERE')
         tlist.insert_before(token, self.nl())
+
+        # Indent and process the (indented) WHERE statement as usual
         self.indent += 1
         self._process_default(tlist)
         self.indent -= 1
 
     def _process_parenthesis(self, tlist):
+        """
+        Process parenthesis
+        """
+        # Omit the 'open parenthesis' token
+        # and check if the next one require say us we should indent
         first = tlist.token_next(0)
-        indented = False
-        if first and first.ttype in (T.Keyword.DML, T.Keyword.DDL):
+        indented = first and first.ttype in (T.Keyword.DML, T.Keyword.DDL)
+
+        # If we should indent, increase indent and add a new line
+        if indented:
             self.indent += 1
             tlist.tokens.insert(0, self.nl())
-            indented = True
-        num_offset = self._get_offset(tlist.token_next_match(0,
-                                                        T.Punctuation, '('))
+
+        # Get indentation offset
+        token = tlist.token_next_match(0, T.Punctuation, '(')
+        num_offset = self._get_offset(token)
+
+        # Increase indentation offset and process the statement as usual
         self.offset += num_offset
         self._process_default(tlist, stmts=not indented)
+        self.offset -= num_offset
+
+        # If we indented, decrease indent to previous state
         if indented:
             self.indent -= 1
-        self.offset -= num_offset
 
     def _process_identifierlist(self, tlist):
-        identifiers = list(tlist.get_identifiers())
-        if len(identifiers) > 1 and not tlist.within(sql.Function):
-            first = list(identifiers[0].flatten())[0]
-            num_offset = self._get_offset(first) - len(first.value)
-            self.offset += num_offset
-            for token in identifiers[1:]:
-                tlist.insert_before(token, self.nl())
-            for token in tlist.tokens:
-                if isinstance(token, sql.Comment):
-                    tlist.insert_after(token, self.nl())
-            self.offset -= num_offset
+        """
+        Process an identifier list
+
+        If there are more than an identifier, put each on a line
+        """
+        # Split the identifier list if we are not in a function
+        if not tlist.within(sql.Function):
+            # Get identifiers from the tlist
+            identifiers = list(tlist.get_identifiers())
+
+            # Split the identifier list if we have more than one identifier
+            if len(identifiers) > 1:
+                # Get first token
+                first = list(identifiers[0].flatten())[0]
+
+                # Increase offset the size of the first token
+                num_offset = self._get_offset(first) - len(first.value)
+
+                # Increase offset and insert new lines
+                self.offset += num_offset
+                offset = 0
+
+                # Insert a new line between the tokens
+                ignore = False
+                for token in identifiers[1:]:
+                    if not ignore:
+                        tlist.insert_before(token, self.nl())
+                    ignore = token.ttype
+
+                    # Check identifiers offset
+                    if token.ttype:
+                        l = len(token.value)
+                        if offset < l:
+                            offset = l
+
+                # Imsert another new line after comment tokens
+                for token in tlist.tokens:
+                    if isinstance(token, sql.Comment):
+                        tlist.insert_after(token, self.nl())
+
+                # Update identifiers offset
+                if offset:
+                    offset += 1
+
+                    ignore = False
+                    for token in identifiers:
+                        if not ignore and not token.ttype:
+                            tlist.insert_before(token, sql.Token(T.Whitespace,
+                                                                 " " * offset))
+                        ignore = token.ttype
+
+                # Decrease offset the size of the first token
+                self.offset -= num_offset
+
+        # Process the identifier list as usual
         self._process_default(tlist)
 
     def _process_case(self, tlist):
-        is_first = True
-        num_offset = None
+        """
+        Process a CASE statement
+        """
+        # Increase the offset the size of the CASE keyword
         case = tlist.tokens[0]
         outer_offset = self._get_offset(case) - len(case.value)
         self.offset += outer_offset
-        for cond, value in tlist.get_cases():
-            if is_first:
-                tcond = list(cond[0].flatten())[0]
-                is_first = False
-                num_offset = self._get_offset(tcond) - len(tcond.value)
-                self.offset += num_offset
-                continue
+
+        # Get the case conditions
+        cases = tlist.get_cases()
+
+        # Get and increase the offset the size of the condition selector
+        cond, value = cases[0]
+        tcond = list(cond[0].flatten())[0]
+        num_offset = self._get_offset(tcond) - len(tcond.value)
+        self.offset += num_offset
+
+        # Insert a new line before each condition
+        for cond, value in cases[1:]:
             if cond is None:
                 token = value[0]
             else:
                 token = cond[0]
+
             tlist.insert_before(token, self.nl())
+
         # Line breaks on group level are done. Now let's add an offset of
         # 5 (=length of "when", "then", "else") and process subgroups.
         self.offset += 5
         self._process_default(tlist)
         self.offset -= 5
-        if num_offset is not None:
-            self.offset -= num_offset
+
+        # Decrease the offset the size of the condition selector
+        self.offset -= num_offset
+
+        # Insert a new line before the case END keyword
         end = tlist.token_next_match(0, T.Keyword, 'END')
         tlist.insert_before(end, self.nl())
+
+        # Decrease the offset the size of the CASE keyword
         self.offset -= outer_offset
 
     def _process_default(self, tlist, stmts=True, kwds=True):
+        """
+        Generic processing of `tlist` statements
+        """
         if stmts:
             self._split_statements(tlist)
         if kwds:
             self._split_kwds(tlist)
-        [self._process(sgroup) for sgroup in tlist.get_sublists()]
+
+        for sgroup in tlist.get_sublists():
+            self._process(sgroup)
 
     def process(self, stack, stmt):
         warn("Deprecated, use callable objects. This will be removed at 0.2.0",
              DeprecationWarning)
 
+        # If we are processing a statement, set it as the current one
         if isinstance(stmt, sql.Statement):
             self._curr_stmt = stmt
+
+        # Process the statement
         self._process(stmt)
+
+        # If we are processing a statement, check if we should add a new line
         if isinstance(stmt, sql.Statement):
-            if self._last_stmt is not None:
+            if self._last_stmt:
                 if unicode(self._last_stmt).endswith('\n'):
                     nl = '\n'
                 else:
                     nl = '\n\n'
-                stmt.tokens.insert(0,
-                    sql.Token(T.Whitespace, nl))
-            if self._last_stmt != stmt:
-                self._last_stmt = stmt
+
+                stmt.tokens.insert(0, sql.Token(T.Whitespace, nl))
+
+            # Set the statement as the current one
+            self._last_stmt = stmt
 
 
 # FIXME: Doesn't work ;)
@@ -491,17 +619,21 @@ class ColumnsSelect:
                     mode = 1
 
             # We have detected a SELECT statement
-            elif mode == 1:
-                if value == 'FROM':
+            elif mode in (1, 3):
+                if value in ('FROM', 'WHERE', 'GROUP'):
                     if oldValue:
                         yield oldValue
+                        oldValue = ""
 
-                    mode = 3    # Columns have been checked
+                    break    # Columns have been checked
 
                 elif value == 'AS':
                     oldValue = ""
                     mode = 2
 
+                elif token_type in Whitespace:
+                    mode = 3
+
                 elif (token_type == Punctuation
                       and value == ',' and not parenthesis):
                     if oldValue:
@@ -514,7 +646,11 @@ class ColumnsSelect:
                     elif value == ')':
                         parenthesis -= 1
 
-                    oldValue += value
+                    if mode == 3:
+                        oldValue = value
+                        mode = 1
+                    else:
+                        oldValue += value
 
             # We are processing an AS keyword
             elif mode == 2:
@@ -523,6 +659,9 @@ class ColumnsSelect:
                     yield value
                     mode = 1
 
+        if oldValue:
+            yield oldValue
+
 
 # ---------------------------
 # postprocess
diff --git a/sqlparse/formatter.py b/sqlparse/formatter.py
index 5be6652..f182850 100644
--- a/sqlparse/formatter.py
+++ b/sqlparse/formatter.py
@@ -11,34 +11,43 @@ from sqlparse import filters
 
 def validate_options(options):
     """Validates options."""
+
+    # keyword_case
     kwcase = options.get('keyword_case', None)
     if kwcase not in [None, 'upper', 'lower', 'capitalize']:
         raise SQLParseError('Invalid value for keyword_case: %r' % kwcase)
 
+    # identifier_case
     idcase = options.get('identifier_case', None)
     if idcase not in [None, 'upper', 'lower', 'capitalize']:
         raise SQLParseError('Invalid value for identifier_case: %r' % idcase)
 
+    # output_format
     ofrmt = options.get('output_format', None)
     if ofrmt not in [None, 'sql', 'python', 'php']:
         raise SQLParseError('Unknown output format: %r' % ofrmt)
 
+    # strip_comments
     strip_comments = options.get('strip_comments', False)
     if strip_comments not in [True, False]:
         raise SQLParseError('Invalid value for strip_comments: %r'
                             % strip_comments)
 
+    # strip_whitespace
     strip_ws = options.get('strip_whitespace', False)
     if strip_ws not in [True, False]:
         raise SQLParseError('Invalid value for strip_whitespace: %r'
                             % strip_ws)
 
+    # reindent
     reindent = options.get('reindent', False)
     if reindent not in [True, False]:
         raise SQLParseError('Invalid value for reindent: %r'
                             % reindent)
     elif reindent:
         options['strip_whitespace'] = True
+
+    # indent_tabs
     indent_tabs = options.get('indent_tabs', False)
     if indent_tabs not in [True, False]:
         raise SQLParseError('Invalid value for indent_tabs: %r' % indent_tabs)
@@ -46,15 +55,20 @@ def validate_options(options):
         options['indent_char'] = '\t'
     else:
         options['indent_char'] = ' '
+
+    # indent_width
     indent_width = options.get('indent_width', 2)
     try:
         indent_width = int(indent_width)
     except (TypeError, ValueError):
         raise SQLParseError('indent_width requires an integer')
+
     if indent_width < 1:
         raise SQLParseError('indent_width requires an positive integer')
+
     options['indent_width'] = indent_width
 
+    # right_margin
     right_margin = options.get('right_margin', None)
     if right_margin is not None:
         try:
@@ -65,6 +79,7 @@ def validate_options(options):
             raise SQLParseError('right_margin requires an integer > 10')
     options['right_margin'] = right_margin
 
+    # return the processed options
     return options
 
 
@@ -89,8 +104,7 @@ def build_filter_stack(stack, options):
         stack.enable_grouping()
         stack.stmtprocess.append(filters.StripCommentsFilter())
 
-    if (options.get('strip_whitespace', False)
-        or options.get('reindent', False)):
+    if options.get('strip_whitespace', False):
         stack.enable_grouping()
         stack.stmtprocess.append(filters.StripWhitespaceFilter())
author	Jesús Leganés Combarro "Piranna" <piranna@gmail.com>	2012-06-03 13:25:55 +0200
committer	Jesús Leganés Combarro "Piranna" <piranna@gmail.com>	2012-06-03 13:25:55 +0200
commit	5771365dee7f434a58f885cccfdcf6696335542b (patch)
tree	0c501922b5bc34f0e3e1ef07bc4dd4dc83c138e7 /sqlparse
parent	7728323fb24d825a47ff29d85d9a64667de7e47f (diff)
parent	49b41027a3cb88b74f9d4ffb1adb34367c7763ce (diff)
download	sqlparse-5771365dee7f434a58f885cccfdcf6696335542b.tar.gz