Merge remote-tracking branch 'core/long_live_indexes' into develop

author: Victor Uriarte <victor.m.uriarte@intel.com> 2016-06-12 20:12:48 -0700
committer: Victor Uriarte <victor.m.uriarte@intel.com> 2016-06-14 03:25:23 -0700
commit: a7ffa646e9c2839999217cc181544a8a4bb9a5fd (patch)
tree: 087f2e53200a3ac28d2a1ce7c4b9f198696166dd /sqlparse
parent: fae3d94f7f3039d5d7b264b6b4aad69c6b03c9a4 (diff)
parent: 89d4f68ba5bbe78a9dd89257cbe4a9f3cfa76433 (diff)
download: sqlparse-a7ffa646e9c2839999217cc181544a8a4bb9a5fd.tar.gz
3 files changed, 175 insertions, 69 deletions
diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py
index 6e414b8..cae5d23 100644
--- a/sqlparse/engine/grouping.py
+++ b/sqlparse/engine/grouping.py
@@ -7,7 +7,7 @@
 
 from sqlparse import sql
 from sqlparse import tokens as T
-from sqlparse.utils import recurse, imt, find_matching
+from sqlparse.utils import recurse, imt
 
 M_ROLE = (T.Keyword, ('null', 'role'))
 M_SEMICOLON = (T.Punctuation, ';')
@@ -23,40 +23,47 @@ def _group_left_right(tlist, m, cls,
                       valid_right=lambda t: t is not None,
                       semicolon=False):
     """Groups together tokens that are joined by a middle token. ie. x < y"""
+    [_group_left_right(sgroup, m, cls, valid_left, valid_right, semicolon)
+     for sgroup in tlist.get_sublists() if not isinstance(sgroup, cls)]
 
-    for token in list(tlist):
-        if token.is_group() and not isinstance(token, cls):
-            _group_left_right(token, m, cls, valid_left, valid_right,
-                              semicolon)
-
-        if not token.match(*m):
-            continue
-
-        left, right = tlist.token_prev(token), tlist.token_next(token)
+    token = tlist.token_next_by(m=m)
+    while token:
+        tidx = tlist.token_index(token)
+        left, right = tlist.token_prev(tidx), tlist.token_next(tidx)
 
         if valid_left(left) and valid_right(right):
             if semicolon:
                 # only overwrite if a semicolon present.
-                sright = tlist.token_next_by(m=M_SEMICOLON, idx=right)
+                sright = tlist.token_next_by(m=M_SEMICOLON, idx=tidx + 1)
                 right = sright or right
-            tokens = tlist.tokens_between(left, right)
-            tlist.group_tokens(cls, tokens, extend=True)
+            # Luckily, this leaves the position of `token` intact.
+            token = tlist.group_tokens_between(cls, left, right, extend=True)
+        token = tlist.token_next_by(m=m, idx=tidx + 1)
 
 
 def _group_matching(tlist, cls):
     """Groups Tokens that have beginning and end."""
-    [_group_matching(sgroup, cls) for sgroup in tlist.get_sublists()
-     if not isinstance(sgroup, cls)]
-    idx = 1 if isinstance(tlist, cls) else 0
+    idx = 1 if imt(tlist, i=cls) else 0
 
-    token = tlist.token_next_by(m=cls.M_OPEN, idx=idx)
-    while token:
-        end = find_matching(tlist, token, cls.M_OPEN, cls.M_CLOSE)
-        if end is not None:
-            tokens = tlist.tokens_between(token, end)
-            token = tlist.group_tokens(cls, tokens)
-            _group_matching(token, cls)
-        token = tlist.token_next_by(m=cls.M_OPEN, idx=token)
+    opens = []
+
+    while True:
+        try:
+            token = tlist.tokens[idx]
+        except IndexError:
+            break
+
+        if token.match(*cls.M_OPEN):
+            opens.append(idx)
+        elif token.match(*cls.M_CLOSE):
+            try:
+                open_idx = opens.pop()
+            except IndexError:
+                break
+            tlist.group_tokens_between(cls, open_idx, idx)
+            idx = open_idx
+
+        idx += 1
 
 
 def group_if(tlist):
@@ -109,8 +116,9 @@ def group_identifier(tlist):
 
     token = tlist.token_next_by(t=T_IDENT)
     while token:
-        token = tlist.group_tokens(sql.Identifier, [token, ])
-        token = tlist.token_next_by(t=T_IDENT, idx=token)
+        tidx = tlist.token_index(token)
+        token = tlist.group_tokens_between(sql.Identifier, tidx, tidx)
+        token = tlist.token_next_by(t=T_IDENT, idx=tidx + 1)
 
 
 def group_period(tlist):
@@ -127,12 +135,11 @@ def group_period(tlist):
 def group_arrays(tlist):
     token = tlist.token_next_by(i=sql.SquareBrackets)
     while token:
-        prev = tlist.token_prev(token)
+        prev = tlist.token_prev(tlist.token_index(token))
         if imt(prev, i=(sql.SquareBrackets, sql.Identifier, sql.Function),
                t=(T.Name, T.String.Symbol,)):
-            tokens = tlist.tokens_between(prev, token)
-            token = tlist.group_tokens(sql.Identifier, tokens, extend=True)
-        token = tlist.token_next_by(i=sql.SquareBrackets, idx=token)
+            token = tlist.group_tokens_between(sql.Identifier, prev, token, extend=True)
+        token = tlist.token_next_by(i=sql.SquareBrackets, idx=tlist.token_index(token) + 1)
 
 
 @recurse(sql.Identifier)
@@ -145,14 +152,13 @@ def group_operator(tlist):
 
     token = tlist.token_next_by(t=(T.Operator, T.Wildcard))
     while token:
-        left, right = tlist.token_prev(token), tlist.token_next(token)
+        left, right = tlist.token_prev(tlist.token_index(token)), tlist.token_next(tlist.token_index(token))
 
         if func(left) and func(right):
             token.ttype = T.Operator
-            tokens = tlist.tokens_between(left, right)
-            token = tlist.group_tokens(sql.Operation, tokens)
+            token = tlist.group_tokens_between(sql.Operation, left, right)
 
-        token = tlist.token_next_by(t=(T.Operator, T.Wildcard), idx=token)
+        token = tlist.token_next_by(t=(T.Operator, T.Wildcard), idx=tlist.token_index(token) + 1)
 
 
 @recurse(sql.IdentifierList)
@@ -163,15 +169,17 @@ def group_identifier_list(tlist):
                     (T.Keyword, T.Comment, T.Wildcard))
 
     func = lambda t: imt(t, i=I_IDENT_LIST, m=M_ROLE, t=T_IDENT_LIST)
-    token = tlist.token_next_by(m=M_COMMA)
 
+    tidx, token = tlist.token_idx_next_by(m=M_COMMA)
     while token:
-        before, after = tlist.token_prev(token), tlist.token_next(token)
+        before_idx, before = tlist.token_idx_prev(tidx)
+        after_idx, after = tlist.token_idx_next(tidx)
 
         if func(before) and func(after):
-            tokens = tlist.tokens_between(before, after)
-            token = tlist.group_tokens(sql.IdentifierList, tokens, extend=True)
-        token = tlist.token_next_by(m=M_COMMA, idx=token)
+            tidx = before_idx
+            token = tlist.group_tokens_between(sql.IdentifierList, tidx, after_idx, extend=True)
+
+        tidx, token = tlist.token_idx_next_by(m=M_COMMA, idx=tidx + 1)
 
 
 def group_brackets(tlist):
@@ -187,29 +195,27 @@ def group_comments(tlist):
     token = tlist.token_next_by(t=T.Comment)
     while token:
         end = tlist.token_not_matching(
-            token, lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace())
+            tlist.token_index(token) + 1, lambda tk: imt(tk, t=T.Comment) or tk.is_whitespace())
         if end is not None:
-            end = tlist.token_prev(end, False)
-            tokens = tlist.tokens_between(token, end)
-            token = tlist.group_tokens(sql.Comment, tokens)
+            end = tlist.token_prev(tlist.token_index(end), False)
+            token = tlist.group_tokens_between(sql.Comment, token, end)
 
-        token = tlist.token_next_by(t=T.Comment, idx=token)
+        token = tlist.token_next_by(t=T.Comment, idx=tlist.token_index(token) + 1)
 
 
 @recurse(sql.Where)
 def group_where(tlist):
     token = tlist.token_next_by(m=sql.Where.M_OPEN)
     while token:
-        end = tlist.token_next_by(m=sql.Where.M_CLOSE, idx=token)
+        end = tlist.token_next_by(m=sql.Where.M_CLOSE, idx=tlist.token_index(token) + 1)
 
         if end is None:
-            tokens = tlist.tokens_between(token, tlist._groupable_tokens[-1])
+            end = tlist._groupable_tokens[-1]
         else:
-            tokens = tlist.tokens_between(
-                token, tlist.tokens[tlist.token_index(end) - 1])
+            end = tlist.tokens[tlist.token_index(end) - 1]
 
-        token = tlist.group_tokens(sql.Where, tokens)
-        token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=token)
+        token = tlist.group_tokens_between(sql.Where, token, end)
+        token = tlist.token_next_by(m=sql.Where.M_OPEN, idx=tlist.token_index(token) + 1)
 
 
 @recurse()
@@ -217,13 +223,12 @@ def group_aliased(tlist):
     I_ALIAS = (sql.Parenthesis, sql.Function, sql.Case, sql.Identifier,
                sql.Operation)
 
-    token = tlist.token_next_by(i=I_ALIAS, t=T.Number)
+    tidx, token = tlist.token_idx_next_by(i=I_ALIAS, t=T.Number)
     while token:
-        next_ = tlist.token_next(token)
+        next_index_, next_ = tlist.token_idx_next(tidx)
         if imt(next_, i=sql.Identifier):
-            tokens = tlist.tokens_between(token, next_)
-            token = tlist.group_tokens(sql.Identifier, tokens, extend=True)
-        token = tlist.token_next_by(i=I_ALIAS, t=T.Number, idx=token)
+            token = tlist.group_tokens_between(sql.Identifier, tidx, next_index_, extend=True)
+        tidx, token = tlist.token_idx_next_by(i=I_ALIAS, t=T.Number, idx=tidx + 1)
 
 
 def group_typecasts(tlist):
@@ -243,33 +248,30 @@ def group_functions(tlist):
         return
     token = tlist.token_next_by(t=T.Name)
     while token:
-        next_ = tlist.token_next(token)
+        next_ = tlist.token_next(tlist.token_index(token))
         if imt(next_, i=sql.Parenthesis):
-            tokens = tlist.tokens_between(token, next_)
-            token = tlist.group_tokens(sql.Function, tokens)
-        token = tlist.token_next_by(t=T.Name, idx=token)
+            token = tlist.group_tokens_between(sql.Function, token, next_)
+        token = tlist.token_next_by(t=T.Name, idx=tlist.token_index(token) + 1)
 
 
 def group_order(tlist):
     """Group together Identifier and Asc/Desc token"""
     token = tlist.token_next_by(t=T.Keyword.Order)
     while token:
-        prev = tlist.token_prev(token)
+        prev = tlist.token_prev(tlist.token_index(token))
         if imt(prev, i=sql.Identifier, t=T.Number):
-            tokens = tlist.tokens_between(prev, token)
-            token = tlist.group_tokens(sql.Identifier, tokens)
-        token = tlist.token_next_by(t=T.Keyword.Order, idx=token)
+            token = tlist.group_tokens_between(sql.Identifier, prev, token)
+        token = tlist.token_next_by(t=T.Keyword.Order, idx=tlist.token_index(token) + 1)
 
 
 @recurse()
 def align_comments(tlist):
     token = tlist.token_next_by(i=sql.Comment)
     while token:
-        before = tlist.token_prev(token)
+        before = tlist.token_prev(tlist.token_index(token))
         if isinstance(before, sql.TokenList):
-            tokens = tlist.tokens_between(before, token)
-            token = tlist.group_tokens(sql.TokenList, tokens, extend=True)
-        token = tlist.token_next_by(i=sql.Comment, idx=token)
+            token = tlist.group_tokens_between(sql.TokenList, before, token, extend=True)
+        token = tlist.token_next_by(i=sql.Comment, idx=tlist.token_index(token) + 1)
 
 
 def group(stmt):
diff --git a/sqlparse/sql.py b/sqlparse/sql.py
index 52b3bf1..54f7d4f 100644
--- a/sqlparse/sql.py
+++ b/sqlparse/sql.py
@@ -204,6 +204,28 @@ class TokenList(Token):
     def _groupable_tokens(self):
         return self.tokens
 
+    def _token_idx_matching(self, funcs, start=0, end=None, reverse=False):
+        """next token that match functions"""
+        if start is None:
+            return None
+
+        if not isinstance(funcs, (list, tuple)):
+            funcs = (funcs,)
+
+        if reverse:
+            assert end is None
+            for idx in range(start - 2, -1, -1):
+                token = self.tokens[idx]
+                for func in funcs:
+                    if func(token):
+                        return idx, token
+        else:
+            for idx, token in enumerate(self.tokens[start:end], start=start):
+                for func in funcs:
+                    if func(token):
+                        return idx, token
+        return None, None
+
     def _token_matching(self, funcs, start=0, end=None, reverse=False):
         """next token that match functions"""
         if start is None:
@@ -225,6 +247,24 @@ class TokenList(Token):
                 if func(token):
                     return token
 
+    def token_first(self, skip_ws=True, skip_cm=False):
+        """Returns the first child token.
+
+        If *skip_ws* is ``True`` (the default), whitespace
+        tokens are ignored.
+
+        if *skip_cm* is ``True`` (default: ``False``), comments are
+        ignored too.
+        """
+        # this on is inconsistent, using Comment instead of T.Comment...
+        funcs = lambda tk: not ((skip_ws and tk.is_whitespace()) or
+                                (skip_cm and imt(tk, t=T.Comment, i=Comment)))
+        return self._token_matching(funcs)
+
+    def token_idx_next_by(self, i=None, m=None, t=None, idx=0, end=None):
+        funcs = lambda tk: imt(tk, i, m, t)
+        return self._token_idx_matching(funcs, idx, end)
+
     def token_next_by(self, i=None, m=None, t=None, idx=0, end=None):
         funcs = lambda tk: imt(tk, i, m, t)
         return self._token_matching(funcs, idx, end)
@@ -237,12 +277,24 @@ class TokenList(Token):
     def token_matching(self, idx, funcs):
         return self._token_matching(funcs, idx)
 
+    def token_idx_prev(self, idx, skip_ws=True):
+        """Returns the previous token relative to *idx*.
+
+        If *skip_ws* is ``True`` (the default) whitespace tokens are ignored.
+        ``None`` is returned if there's no previous token.
+        """
+        idx += 1  # alot of code usage current pre-compensates for this
+        funcs = lambda tk: not (tk.is_whitespace() and skip_ws)
+        return self._token_idx_matching(funcs, idx, reverse=True)
+
     def token_prev(self, idx=0, skip_ws=True, skip_cm=False):
         """Returns the previous token relative to *idx*.
 
         If *skip_ws* is ``True`` (the default) whitespace tokens are ignored.
         ``None`` is returned if there's no previous token.
         """
+        if isinstance(idx, int):
+            idx += 1  # alot of code usage current pre-compensates for this
         funcs = lambda tk: not ((skip_ws and tk.is_whitespace()) or
                                 (skip_cm and imt(tk, t=T.Comment, i=Comment)))
         return self._token_matching(funcs, idx, reverse=True)
@@ -255,10 +307,32 @@ class TokenList(Token):
         If *skip_cm* is ``True`` (default: ``False``), comments are ignored.
         ``None`` is returned if there's no next token.
         """
+        if isinstance(idx, int):
+            idx += 1  # alot of code usage current pre-compensates for this
         funcs = lambda tk: not ((skip_ws and tk.is_whitespace()) or
                                 (skip_cm and imt(tk, t=T.Comment, i=Comment)))
         return self._token_matching(funcs, idx)
 
+    def token_idx_next(self, idx, skip_ws=True):
+        """Returns the next token relative to *idx*.
+
+        If *skip_ws* is ``True`` (the default) whitespace tokens are ignored.
+        ``None`` is returned if there's no next token.
+        """
+        if isinstance(idx, int):
+            idx += 1  # alot of code usage current pre-compensates for this
+        try:
+            if not skip_ws:
+                return idx, self.tokens[idx]
+            else:
+                while True:
+                    token = self.tokens[idx]
+                    if not token.is_whitespace():
+                        return idx, token
+                    idx += 1
+        except IndexError:
+            return None, None
+
     def token_index(self, token, start=0):
         """Return list index of token."""
         start = start if isinstance(start, int) else self.token_index(start)
@@ -274,6 +348,36 @@ class TokenList(Token):
         end_idx = include_end + self.token_index(end)
         return self.tokens[start_idx:end_idx]
 
+    def group_tokens_between(self, grp_cls, start, end, include_end=True,
+                             extend=False):
+        """Replace tokens by an instance of *grp_cls*."""
+        if isinstance(start, int):
+            start_idx = start
+            start = self.tokens[start_idx]
+        else:
+            start_idx = self.token_index(start)
+
+        end_idx = self.token_index(end) if not isinstance(end, int) else end
+        end_idx += include_end
+
+        if extend and isinstance(start, grp_cls):
+            subtokens = self.tokens[start_idx + 1:end_idx]
+
+            grp = start
+            grp.tokens.extend(subtokens)
+            del self.tokens[start_idx + 1:end_idx]
+            grp.value = start.__str__()
+        else:
+            subtokens = self.tokens[start_idx:end_idx]
+            grp = grp_cls(subtokens)
+            self.tokens[start_idx:end_idx] = [grp]
+            grp.parent = self
+
+        for token in subtokens:
+            token.parent = grp
+
+        return grp
+
     def group_tokens(self, grp_cls, tokens, skip_ws=False, extend=False):
         """Replace tokens by an instance of *grp_cls*."""
 
@@ -386,7 +490,7 @@ class Statement(TokenList):
         Whitespaces and comments at the beginning of the statement
         are ignored.
         """
-        first_token = self.token_next(skip_cm=True)
+        first_token = self.token_first(skip_cm=True)
         if first_token is None:
             # An "empty" statement that either has not tokens at all
             # or only whitespace tokens.
diff --git a/sqlparse/utils.py b/sqlparse/utils.py
index 4a8646d..59301ff 100644
--- a/sqlparse/utils.py
+++ b/sqlparse/utils.py
@@ -104,7 +104,7 @@ def imt(token, i=None, m=None, t=None):
 
 
 def find_matching(tlist, token, open_pattern, close_pattern):
-    idx = tlist.token_index(token)
+    idx = tlist.token_index(token) if not isinstance(token, int) else token
     depth = 0
     for token in tlist.tokens[idx:]:
         if token.match(*open_pattern):
author	Victor Uriarte <victor.m.uriarte@intel.com>	2016-06-12 20:12:48 -0700
committer	Victor Uriarte <victor.m.uriarte@intel.com>	2016-06-14 03:25:23 -0700
commit	a7ffa646e9c2839999217cc181544a8a4bb9a5fd (patch)
tree	087f2e53200a3ac28d2a1ce7c4b9f198696166dd /sqlparse
parent	fae3d94f7f3039d5d7b264b6b4aad69c6b03c9a4 (diff)
parent	89d4f68ba5bbe78a9dd89257cbe4a9f3cfa76433 (diff)
download	sqlparse-a7ffa646e9c2839999217cc181544a8a4bb9a5fd.tar.gz