Minor refactor statement filter and add comments

author: Victor Uriarte <victor.m.uriarte@intel.com> 2016-06-02 22:40:37 -0700
committer: Victor Uriarte <victor.m.uriarte@intel.com> 2016-06-04 15:06:04 -0700
commit: f0a6af57b7a5c116528db73643b26f934477d350 (patch)
tree: 3d45c1c64a5191e0710ad3f398ab0afb6b1a2563 /sqlparse/engine
parent: 3fed0393a80a40ea28e5fc0cea9b526630e9f42b (diff)
download: sqlparse-f0a6af57b7a5c116528db73643b26f934477d350.tar.gz
1 files changed, 27 insertions, 10 deletions
diff --git a/sqlparse/engine/filter.py b/sqlparse/engine/filter.py
index 3847e3d..c7b3bf8 100644
--- a/sqlparse/engine/filter.py
+++ b/sqlparse/engine/filter.py
@@ -10,41 +10,51 @@ from sqlparse import tokens as T
 
 
 class StatementFilter(object):
-    "Filter that split stream at individual statements"
+    """Filter that split stream at individual statements"""
 
     def __init__(self):
-        self._in_declare = False
-        self._in_dbldollar = False
-        self._is_create = False
-        self._begin_depth = 0
+        self._reset()
 
     def _reset(self):
-        "Set the filter attributes to its default values"
+        """Set the filter attributes to its default values"""
         self._in_declare = False
         self._in_dbldollar = False
         self._is_create = False
         self._begin_depth = 0
 
     def _change_splitlevel(self, ttype, value):
-        "Get the new split level (increase, decrease or remain equal)"
+        """Get the new split level (increase, decrease or remain equal)"""
         # PostgreSQL
         if ttype == T.Name.Builtin \
            and value.startswith('$') and value.endswith('$'):
+
+            # 2nd dbldollar found. $quote$ completed
+            # decrease level
             if self._in_dbldollar:
                 self._in_dbldollar = False
                 return -1
             else:
                 self._in_dbldollar = True
                 return 1
+
+        # if inside $$ everything inside is defining function character.
+        # Nothing inside can create a new statement
         elif self._in_dbldollar:
             return 0
 
         # ANSI
+        # if normal token return
+        # wouldn't parenthesis increase/decrease a level?
+        # no, inside a paranthesis can't start new statement
         if ttype not in T.Keyword:
             return 0
 
+        # Everything after here is ttype = T.Keyword
+        # Also to note, once entered an If statement you are done and basically
+        # returning
         unified = value.upper()
 
+        # can have nested declare inside of being...
         if unified == 'DECLARE' and self._is_create and self._begin_depth == 0:
             self._in_declare = True
             return 1
@@ -59,12 +69,16 @@ class StatementFilter(object):
         if unified in ('END IF', 'END FOR', 'END WHILE'):
             return -1
 
+        # Should this respect a preceeding BEGIN?
+        # In CASE ... WHEN ... END this results in a split level -1.
+        # Would having multiple CASE WHEN END and a Assigment Operator
+        # cause the statement to cut off prematurely?
         if unified == 'END':
-            # Should this respect a preceeding BEGIN?
-            # In CASE ... WHEN ... END this results in a split level -1.
             self._begin_depth = max(0, self._begin_depth - 1)
             return -1
 
+        # three keywords begin with CREATE, but only one of them is DDL
+        # DDL Create though can contain more words such as "or replace"
         if ttype is T.Keyword.DDL and unified.startswith('CREATE'):
             self._is_create = True
             return 0
@@ -77,7 +91,7 @@ class StatementFilter(object):
         return 0
 
     def process(self, stream):
-        "Process the stream"
+        """Process the stream"""
         consume_ws = False
         splitlevel = 0
         stmt = None
@@ -86,6 +100,9 @@ class StatementFilter(object):
         # Run over all stream tokens
         for ttype, value in stream:
             # Yield token if we finished a statement and there's no whitespaces
+            # It will count newline token as a non whitespace. In this context
+            # whitespace ignores newlines.
+            # why don't multi line comments also count?
             if consume_ws and ttype not in (T.Whitespace, T.Comment.Single):
                 stmt.tokens = stmt_tokens
                 yield stmt
author	Victor Uriarte <victor.m.uriarte@intel.com>	2016-06-02 22:40:37 -0700
committer	Victor Uriarte <victor.m.uriarte@intel.com>	2016-06-04 15:06:04 -0700
commit	f0a6af57b7a5c116528db73643b26f934477d350 (patch)
tree	3d45c1c64a5191e0710ad3f398ab0afb6b1a2563 /sqlparse/engine
parent	3fed0393a80a40ea28e5fc0cea9b526630e9f42b (diff)
download	sqlparse-f0a6af57b7a5c116528db73643b26f934477d350.tar.gz