diff options
Diffstat (limited to 'sqlparse/lexer.py')
-rw-r--r-- | sqlparse/lexer.py | 51 |
1 files changed, 14 insertions, 37 deletions
diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py index fd29f5c..7ce6d36 100644 --- a/sqlparse/lexer.py +++ b/sqlparse/lexer.py @@ -15,31 +15,23 @@ import re import sys +from sqlparse import compat from sqlparse import tokens +from sqlparse.compat import StringIO from sqlparse.keywords import KEYWORDS, KEYWORDS_COMMON -from cStringIO import StringIO class include(str): pass -class combined(tuple): - """Indicates a state combined from multiple states.""" - - def __new__(cls, *args): - return tuple.__new__(cls, args) - - def __init__(self, *args): - # tuple.__init__ doesn't do anything - pass - - def is_keyword(value): test = value.upper() return KEYWORDS_COMMON.get(test, KEYWORDS.get(test, tokens.Name)), value +# TODO(andi): Can this be removed? If so, add_filter and Lexer.filters +# should be removed too. def apply_filters(stream, filters, lexer=None): """ Use this method to apply an iterable of filters to @@ -81,14 +73,14 @@ class LexerMeta(type): try: rex = re.compile(tdef[0], rflags).match - except Exception, err: + except Exception as err: raise ValueError(("uncompilable regex %r in state" " %r of %r: %s" % (tdef[0], state, cls, err))) assert type(tdef[1]) is tokens._TokenType or callable(tdef[1]), \ - ('token type must be simple type or callable, not %r' - % (tdef[1],)) + ('token type must be simple type or callable, not %r' + % (tdef[1],)) if len(tdef) == 2: new_state = None @@ -106,24 +98,12 @@ class LexerMeta(type): new_state = -int(tdef2[5:]) else: assert False, 'unknown new state %r' % tdef2 - elif isinstance(tdef2, combined): - # combine a new state from existing ones - new_state = '_tmp_%d' % cls._tmpname - cls._tmpname += 1 - itokens = [] - for istate in tdef2: - assert istate != state, \ - 'circular state ref %r' % istate - itokens.extend(cls._process_state(unprocessed, - processed, istate)) - processed[new_state] = itokens - new_state = (new_state,) elif isinstance(tdef2, tuple): # push more than one state for state in tdef2: assert (state in unprocessed or state in ('#pop', '#push')), \ - 'unknown new state ' + state + 'unknown new state ' + state new_state = tdef2 else: assert False, 'unknown new state def %r' % tdef2 @@ -134,7 +114,6 @@ class LexerMeta(type): cls._all_tokens = {} cls._tmpname = 0 processed = cls._all_tokens[cls.__name__] = {} - #tokendefs = tokendefs or cls.tokens[name] for state in cls.tokens.keys(): cls._process_state(cls.tokens, processed, state) return processed @@ -152,9 +131,7 @@ class LexerMeta(type): return type.__call__(cls, *args, **kwds) -class Lexer(object): - - __metaclass__ = LexerMeta +class Lexer(compat.with_metaclass(LexerMeta)): encoding = 'utf-8' stripall = False @@ -235,8 +212,8 @@ class Lexer(object): if self.encoding == 'guess': try: text = text.decode('utf-8') - if text.startswith(u'\ufeff'): - text = text[len(u'\ufeff'):] + if text.startswith(compat.text_type('\ufeff')): + text = text[len(compat.text_type('\ufeff')):] except UnicodeDecodeError: text = text.decode('latin1') else: @@ -258,13 +235,13 @@ class Lexer(object): Also preprocess the text, i.e. expand tabs and strip it if wanted and applies registered filters. """ - if isinstance(text, basestring): + if isinstance(text, compat.string_types): if self.stripall: text = text.strip() elif self.stripnl: text = text.strip('\n') - if sys.version_info[0] < 3 and isinstance(text, unicode): + if compat.PY2 and isinstance(text, compat.text_type): text = StringIO(text.encode('utf-8')) self.encoding = 'utf-8' else: @@ -342,7 +319,7 @@ class Lexer(object): pos += 1 statestack = ['root'] statetokens = tokendefs['root'] - yield pos, tokens.Text, u'\n' + yield pos, tokens.Text, compat.text_type('\n') continue yield pos, tokens.Error, text[pos] pos += 1 |