summaryrefslogtreecommitdiff
path: root/sqlparse/lexer.py
diff options
context:
space:
mode:
Diffstat (limited to 'sqlparse/lexer.py')
-rw-r--r--sqlparse/lexer.py51
1 files changed, 14 insertions, 37 deletions
diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py
index fd29f5c..7ce6d36 100644
--- a/sqlparse/lexer.py
+++ b/sqlparse/lexer.py
@@ -15,31 +15,23 @@
import re
import sys
+from sqlparse import compat
from sqlparse import tokens
+from sqlparse.compat import StringIO
from sqlparse.keywords import KEYWORDS, KEYWORDS_COMMON
-from cStringIO import StringIO
class include(str):
pass
-class combined(tuple):
- """Indicates a state combined from multiple states."""
-
- def __new__(cls, *args):
- return tuple.__new__(cls, args)
-
- def __init__(self, *args):
- # tuple.__init__ doesn't do anything
- pass
-
-
def is_keyword(value):
test = value.upper()
return KEYWORDS_COMMON.get(test, KEYWORDS.get(test, tokens.Name)), value
+# TODO(andi): Can this be removed? If so, add_filter and Lexer.filters
+# should be removed too.
def apply_filters(stream, filters, lexer=None):
"""
Use this method to apply an iterable of filters to
@@ -81,14 +73,14 @@ class LexerMeta(type):
try:
rex = re.compile(tdef[0], rflags).match
- except Exception, err:
+ except Exception as err:
raise ValueError(("uncompilable regex %r in state"
" %r of %r: %s"
% (tdef[0], state, cls, err)))
assert type(tdef[1]) is tokens._TokenType or callable(tdef[1]), \
- ('token type must be simple type or callable, not %r'
- % (tdef[1],))
+ ('token type must be simple type or callable, not %r'
+ % (tdef[1],))
if len(tdef) == 2:
new_state = None
@@ -106,24 +98,12 @@ class LexerMeta(type):
new_state = -int(tdef2[5:])
else:
assert False, 'unknown new state %r' % tdef2
- elif isinstance(tdef2, combined):
- # combine a new state from existing ones
- new_state = '_tmp_%d' % cls._tmpname
- cls._tmpname += 1
- itokens = []
- for istate in tdef2:
- assert istate != state, \
- 'circular state ref %r' % istate
- itokens.extend(cls._process_state(unprocessed,
- processed, istate))
- processed[new_state] = itokens
- new_state = (new_state,)
elif isinstance(tdef2, tuple):
# push more than one state
for state in tdef2:
assert (state in unprocessed or
state in ('#pop', '#push')), \
- 'unknown new state ' + state
+ 'unknown new state ' + state
new_state = tdef2
else:
assert False, 'unknown new state def %r' % tdef2
@@ -134,7 +114,6 @@ class LexerMeta(type):
cls._all_tokens = {}
cls._tmpname = 0
processed = cls._all_tokens[cls.__name__] = {}
- #tokendefs = tokendefs or cls.tokens[name]
for state in cls.tokens.keys():
cls._process_state(cls.tokens, processed, state)
return processed
@@ -152,9 +131,7 @@ class LexerMeta(type):
return type.__call__(cls, *args, **kwds)
-class Lexer(object):
-
- __metaclass__ = LexerMeta
+class Lexer(compat.with_metaclass(LexerMeta)):
encoding = 'utf-8'
stripall = False
@@ -235,8 +212,8 @@ class Lexer(object):
if self.encoding == 'guess':
try:
text = text.decode('utf-8')
- if text.startswith(u'\ufeff'):
- text = text[len(u'\ufeff'):]
+ if text.startswith(compat.text_type('\ufeff')):
+ text = text[len(compat.text_type('\ufeff')):]
except UnicodeDecodeError:
text = text.decode('latin1')
else:
@@ -258,13 +235,13 @@ class Lexer(object):
Also preprocess the text, i.e. expand tabs and strip it if
wanted and applies registered filters.
"""
- if isinstance(text, basestring):
+ if isinstance(text, compat.string_types):
if self.stripall:
text = text.strip()
elif self.stripnl:
text = text.strip('\n')
- if sys.version_info[0] < 3 and isinstance(text, unicode):
+ if compat.PY2 and isinstance(text, compat.text_type):
text = StringIO(text.encode('utf-8'))
self.encoding = 'utf-8'
else:
@@ -342,7 +319,7 @@ class Lexer(object):
pos += 1
statestack = ['root']
statetokens = tokendefs['root']
- yield pos, tokens.Text, u'\n'
+ yield pos, tokens.Text, compat.text_type('\n')
continue
yield pos, tokens.Error, text[pos]
pos += 1