Replace _group_matching with an inward-out grouping algorithm

All the matching between open/close was done all the time, first finding the matching closing token, and then grouping the tokens in between, and recurse over the newly created list. Instead, it is more efficient to look for the previous open-token on finding a closing-token, group these two together, and then continue on. squashed: Handle token indices in group_tokens_between and find_matching.
author: Sjoerd Job Postmus <sjoerdjob@sjec.nl> 2016-06-02 08:30:27 +0200
committer: Victor Uriarte <victor.m.uriarte@intel.com> 2016-06-12 17:33:15 -0700
commit: d4cc0644c8348da5e49c58df5e26a3e969045249 (patch)
tree: a23fcaa75e313369194a27e0f7ac040b20b9e23c /sqlparse
parent: 896774cb5298924abbcea81b9b90f1c7c10b3d6a (diff)
download: sqlparse-d4cc0644c8348da5e49c58df5e26a3e969045249.tar.gz
3 files changed, 30 insertions, 13 deletions
diff --git a/sqlparse/engine/grouping.py b/sqlparse/engine/grouping.py
index ad7da9f..e004eae 100644
--- a/sqlparse/engine/grouping.py
+++ b/sqlparse/engine/grouping.py
@@ -2,7 +2,7 @@
 
 from sqlparse import sql
 from sqlparse import tokens as T
-from sqlparse.utils import recurse, imt, find_matching
+from sqlparse.utils import recurse, imt
 
 M_ROLE = (T.Keyword, ('null', 'role'))
 M_SEMICOLON = (T.Punctuation, ';')
@@ -39,13 +39,25 @@ def _group_matching(tlist, cls):
     """Groups Tokens that have beginning and end. ie. parenthesis, brackets.."""
     idx = 1 if imt(tlist, i=cls) else 0
 
-    token = tlist.token_next_by(m=cls.M_OPEN, idx=idx)
-    while token:
-        end = find_matching(tlist, token, cls.M_OPEN, cls.M_CLOSE)
-        if end is not None:
-            token = tlist.group_tokens_between(cls, token, end)
-            _group_matching(token, cls)
-        token = tlist.token_next_by(m=cls.M_OPEN, idx=tlist.token_index(token) + 1)
+    opens = []
+
+    while True:
+        try:
+            token = tlist.tokens[idx]
+        except IndexError:
+            break
+
+        if token.match(*cls.M_OPEN):
+            opens.append(idx)
+        elif token.match(*cls.M_CLOSE):
+            try:
+                open_idx = opens.pop()
+            except IndexError:
+                break
+            tlist.group_tokens_between(cls, open_idx, idx)
+            idx = open_idx
+
+        idx += 1
 
 
 def group_if(tlist):
diff --git a/sqlparse/sql.py b/sqlparse/sql.py
index 81cd8e9..dfe0430 100644
--- a/sqlparse/sql.py
+++ b/sqlparse/sql.py
@@ -331,9 +331,14 @@ class TokenList(Token):
 
     def group_tokens_between(self, grp_cls, start, end, include_end=True, extend=False):
         """Replace tokens by an instance of *grp_cls*."""
-        start_idx = self.token_index(start)
-        end_idx = self.token_index(end) + include_end
-        tokens = self.tokens[start_idx:end_idx]
+        if isinstance(start, int):
+            start_idx = start
+            start = self.tokens[start_idx]
+        else:
+            start_idx = self.token_index(start)
+
+        end_idx = self.token_index(end) if not isinstance(end, int) else end
+        end_idx += include_end
 
         if extend and isinstance(start, grp_cls):
             subtokens = self.tokens[start_idx+1:end_idx]
@@ -344,7 +349,7 @@ class TokenList(Token):
             grp.value = start.__str__()
         else:
             subtokens = self.tokens[start_idx:end_idx]
-            grp = grp_cls(tokens)
+            grp = grp_cls(subtokens)
             self.tokens[start_idx:end_idx] = [grp]
             grp.parent = self
 
diff --git a/sqlparse/utils.py b/sqlparse/utils.py
index 90acb5c..5e01f58 100644
--- a/sqlparse/utils.py
+++ b/sqlparse/utils.py
@@ -164,7 +164,7 @@ def imt(token, i=None, m=None, t=None):
 
 
 def find_matching(tlist, token, M1, M2):
-    idx = tlist.token_index(token)
+    idx = tlist.token_index(token) if not isinstance(token, int) else token
     depth = 0
     for token in tlist.tokens[idx:]:
         if token.match(*M1):
author	Sjoerd Job Postmus <sjoerdjob@sjec.nl>	2016-06-02 08:30:27 +0200
committer	Victor Uriarte <victor.m.uriarte@intel.com>	2016-06-12 17:33:15 -0700
commit	d4cc0644c8348da5e49c58df5e26a3e969045249 (patch)
tree	a23fcaa75e313369194a27e0f7ac040b20b9e23c /sqlparse
parent	896774cb5298924abbcea81b9b90f1c7c10b3d6a (diff)
download	sqlparse-d4cc0644c8348da5e49c58df5e26a3e969045249.tar.gz