1 files changed, 14 insertions, 37 deletions
diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
index fd29f5c..7ce6d36 100644
--- a/sqlparse/lexer.py
+++ b/sqlparse/lexer.py
@@ -15,31 +15,23 @@
 import re
 import sys
 
+from sqlparse import compat
 from sqlparse import tokens
+from sqlparse.compat import StringIO
 from sqlparse.keywords import KEYWORDS, KEYWORDS_COMMON
-from cStringIO import StringIO
 
 
 class include(str):
     pass
 
 
-class combined(tuple):
-    """Indicates a state combined from multiple states."""
-
-    def __new__(cls, *args):
-        return tuple.__new__(cls, args)
-
-    def __init__(self, *args):
-        # tuple.__init__ doesn't do anything
-        pass
-
-
 def is_keyword(value):
     test = value.upper()
     return KEYWORDS_COMMON.get(test, KEYWORDS.get(test, tokens.Name)), value
 
 
+# TODO(andi): Can this be removed? If so, add_filter and Lexer.filters
+# should be removed too.
 def apply_filters(stream, filters, lexer=None):
     """
     Use this method to apply an iterable of filters to
@@ -81,14 +73,14 @@ class LexerMeta(type):
 
             try:
                 rex = re.compile(tdef[0], rflags).match
-            except Exception, err:
+            except Exception as err:
                 raise ValueError(("uncompilable regex %r in state"
                                   " %r of %r: %s"
                                   % (tdef[0], state, cls, err)))
 
             assert type(tdef[1]) is tokens._TokenType or callable(tdef[1]), \
-                   ('token type must be simple type or callable, not %r'
-                    % (tdef[1],))
+                ('token type must be simple type or callable, not %r'
+                 % (tdef[1],))
 
             if len(tdef) == 2:
                 new_state = None
@@ -106,24 +98,12 @@ class LexerMeta(type):
                         new_state = -int(tdef2[5:])
                     else:
                         assert False, 'unknown new state %r' % tdef2
-                elif isinstance(tdef2, combined):
-                    # combine a new state from existing ones
-                    new_state = '_tmp_%d' % cls._tmpname
-                    cls._tmpname += 1
-                    itokens = []
-                    for istate in tdef2:
-                        assert istate != state, \
-                               'circular state ref %r' % istate
-                        itokens.extend(cls._process_state(unprocessed,
-                                                          processed, istate))
-                    processed[new_state] = itokens
-                    new_state = (new_state,)
                 elif isinstance(tdef2, tuple):
                     # push more than one state
                     for state in tdef2:
                         assert (state in unprocessed or
                                 state in ('#pop', '#push')), \
-                               'unknown new state ' + state
+                            'unknown new state ' + state
                     new_state = tdef2
                 else:
                     assert False, 'unknown new state def %r' % tdef2
@@ -134,7 +114,6 @@ class LexerMeta(type):
         cls._all_tokens = {}
         cls._tmpname = 0
         processed = cls._all_tokens[cls.__name__] = {}
-        #tokendefs = tokendefs or cls.tokens[name]
         for state in cls.tokens.keys():
             cls._process_state(cls.tokens, processed, state)
         return processed
@@ -152,9 +131,7 @@ class LexerMeta(type):
         return type.__call__(cls, *args, **kwds)
 
 
-class Lexer(object):
-
-    __metaclass__ = LexerMeta
+class Lexer(compat.with_metaclass(LexerMeta)):
 
     encoding = 'utf-8'
     stripall = False
@@ -235,8 +212,8 @@ class Lexer(object):
         if self.encoding == 'guess':
             try:
                 text = text.decode('utf-8')
-                if text.startswith(u'\ufeff'):
-                    text = text[len(u'\ufeff'):]
+                if text.startswith(compat.text_type('\ufeff')):
+                    text = text[len(compat.text_type('\ufeff')):]
             except UnicodeDecodeError:
                 text = text.decode('latin1')
         else:
@@ -258,13 +235,13 @@ class Lexer(object):
         Also preprocess the text, i.e. expand tabs and strip it if
         wanted and applies registered filters.
         """
-        if isinstance(text, basestring):
+        if isinstance(text, compat.string_types):
             if self.stripall:
                 text = text.strip()
             elif self.stripnl:
                 text = text.strip('\n')
 
-            if sys.version_info[0] < 3 and isinstance(text, unicode):
+            if compat.PY2 and isinstance(text, compat.text_type):
                 text = StringIO(text.encode('utf-8'))
                 self.encoding = 'utf-8'
             else:
@@ -342,7 +319,7 @@ class Lexer(object):
                         pos += 1
                         statestack = ['root']
                         statetokens = tokendefs['root']
-                        yield pos, tokens.Text, u'\n'
+                        yield pos, tokens.Text, compat.text_type('\n')
                         continue
                     yield pos, tokens.Error, text[pos]
                     pos += 1