Merge pull request #201 from rygwdn/faster-in-clause

Speed up grouping of indentifiers
author: Andi Albrecht <albrecht.andi@gmail.com> 2015-10-22 07:39:52 +0200
committer: Andi Albrecht <albrecht.andi@gmail.com> 2015-10-22 07:39:52 +0200
commit: 0ae9e926177877caefc9aaa2535c9f1e82a90d11 (patch)
tree: 7d169f4ba71c7d595eb4f3955cf80c3c67513840 /sqlparse
parent: 393bed9938826aaf7798dabb1a0187df8f39d72c (diff)
parent: 2bde1b9e494bc8673e0967a7b1f59150131659cc (diff)
download: sqlparse-0ae9e926177877caefc9aaa2535c9f1e82a90d11.tar.gz
2 files changed, 25 insertions, 12 deletions
diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py
index a317044..132d06f 100644
--- a/sqlparse/engine/grouping.py
+++ b/sqlparse/engine/grouping.py
@@ -188,10 +188,13 @@ def group_identifier(tlist):
         t1 = tl.token_next_by_type(
             i, (T.String.Symbol, T.Name, T.Literal.Number.Integer,
                 T.Literal.Number.Float))
-        t2 = tl.token_next_by_instance(i, (sql.Function, sql.Parenthesis))
+
+        i1 = tl.token_index(t1, start=i) if t1 else None
+        t2_end = None if i1 is None else i1 + 1
+        t2 = tl.token_next_by_instance(i, (sql.Function, sql.Parenthesis), end=t2_end)
+
         if t1 and t2:
-            i1 = tl.token_index(t1)
-            i2 = tl.token_index(t2)
+            i2 = tl.token_index(t2, start=i)
             if i1 > i2:
                 return t2
             else:
@@ -211,7 +214,7 @@ def group_identifier(tlist):
     while token:
         identifier_tokens = [token] + list(
             _consume_cycle(tlist,
-                           tlist.token_index(token) + 1))
+                           tlist.token_index(token, start=idx) + 1))
         # remove trailing whitespace
         if identifier_tokens and identifier_tokens[-1].ttype is T.Whitespace:
             identifier_tokens = identifier_tokens[:-1]
@@ -220,7 +223,7 @@ def group_identifier(tlist):
                      or identifier_tokens[0].ttype in (T.Literal.Number.Integer,
                                                        T.Literal.Number.Float))):
             group = tlist.group_tokens(sql.Identifier, identifier_tokens)
-            idx = tlist.token_index(group) + 1
+            idx = tlist.token_index(group, start=idx) + 1
         else:
             idx += 1
         token = _next_token(tlist, idx)
@@ -249,8 +252,9 @@ def group_identifier_list(tlist):
     tcomma = tlist.token_next_match(idx, T.Punctuation, ',')
     start = None
     while tcomma is not None:
-        before = tlist.token_prev(tcomma)
-        after = tlist.token_next(tcomma)
+        idx = tlist.token_index(tcomma, start=idx)
+        before = tlist.token_prev(idx)
+        after = tlist.token_next(idx)
         # Check if the tokens around tcomma belong to a list
         bpassed = apassed = False
         for func in fend1_funcs:
@@ -261,12 +265,13 @@ def group_identifier_list(tlist):
         if not bpassed or not apassed:
             # Something's wrong here, skip ahead to next ","
             start = None
-            tcomma = tlist.token_next_match(tlist.token_index(tcomma) + 1,
+            tcomma = tlist.token_next_match(idx + 1,
                                             T.Punctuation, ',')
         else:
             if start is None:
                 start = before
-            next_ = tlist.token_next(after)
+            after_idx = tlist.token_index(after, start=idx)
+            next_ = tlist.token_next(after_idx)
             if next_ is None or not next_.match(T.Punctuation, ','):
                 # Reached the end of the list
                 tokens = tlist.tokens_between(start, after)
diff --git a/sqlparse/sql.py b/sqlparse/sql.py
index 5ecfbdc..7325712 100644
--- a/sqlparse/sql.py
+++ b/sqlparse/sql.py
@@ -256,7 +256,7 @@ class TokenList(Token):
                 continue
             return token
 
-    def token_next_by_instance(self, idx, clss):
+    def token_next_by_instance(self, idx, clss, end=None):
         """Returns the next token matching a class.
 
         *idx* is where to start searching in the list of child tokens.
@@ -267,7 +267,7 @@ class TokenList(Token):
         if not isinstance(clss, (list, tuple)):
             clss = (clss,)
 
-        for token in self.tokens[idx:]:
+        for token in self.tokens[idx:end]:
             if isinstance(token, clss):
                 return token
 
@@ -343,8 +343,16 @@ class TokenList(Token):
                 continue
             return self.tokens[idx]
 
-    def token_index(self, token):
+    def token_index(self, token, start=0):
         """Return list index of token."""
+        if start > 0:
+            # Performing `index` manually is much faster when starting in the middle
+            # of the list of tokens and expecting to find the token near to the starting
+            # index.
+            for i in xrange(start, len(self.tokens)):
+                if self.tokens[i] == token:
+                    return i
+            return -1
         return self.tokens.index(token)
 
     def tokens_between(self, start, end, exclude_end=False):
author	Andi Albrecht <albrecht.andi@gmail.com>	2015-10-22 07:39:52 +0200
committer	Andi Albrecht <albrecht.andi@gmail.com>	2015-10-22 07:39:52 +0200
commit	0ae9e926177877caefc9aaa2535c9f1e82a90d11 (patch)
tree	7d169f4ba71c7d595eb4f3955cf80c3c67513840 /sqlparse
parent	393bed9938826aaf7798dabb1a0187df8f39d72c (diff)
parent	2bde1b9e494bc8673e0967a7b1f59150131659cc (diff)
download	sqlparse-0ae9e926177877caefc9aaa2535c9f1e82a90d11.tar.gz