From 2aa7f0c68f5ba77b347e216994fd01be63e8aeb0 Mon Sep 17 00:00:00 2001 From: Andi Albrecht Date: Wed, 23 Oct 2013 10:27:33 +0200 Subject: Remove buffered reading again (fixes #114). It causes problems with some sources. --- CHANGES | 8 ++++++++ sqlparse/lexer.py | 22 +--------------------- 2 files changed, 9 insertions(+), 21 deletions(-) diff --git a/CHANGES b/CHANGES index 160c536..5e73c98 100644 --- a/CHANGES +++ b/CHANGES @@ -1,3 +1,11 @@ +Development Version +------------------- + +Bug Fixes +* Removed buffered reading again, it obviously causes wrong parsing in some rare + cases (issue114). + + Release 0.1.9 (Sep 28, 2013) ---------------------------- diff --git a/sqlparse/lexer.py b/sqlparse/lexer.py index 45ba4cb..a8e66f9 100644 --- a/sqlparse/lexer.py +++ b/sqlparse/lexer.py @@ -161,9 +161,6 @@ class Lexer(object): stripnl = False tabsize = 0 flags = re.IGNORECASE | re.UNICODE - DEFAULT_BUFSIZE = 4096 - MAX_BUFSIZE = 2 ** 31 - bufsize = DEFAULT_BUFSIZE tokens = { 'root': [ @@ -286,18 +283,13 @@ class Lexer(object): statetokens = tokendefs[statestack[-1]] known_names = {} - text = stream.read(self.bufsize) - hasmore = len(text) == self.bufsize + text = stream.read() text = self._decode(text) while 1: for rexmatch, action, new_state in statetokens: m = rexmatch(text, pos) if m: - if hasmore and m.end() == len(text): - # Since this is end, token may be truncated - continue - # print rex.pattern value = m.group() if value in known_names: @@ -330,20 +322,8 @@ class Lexer(object): else: assert False, "wrong state def: %r" % new_state statetokens = tokendefs[statestack[-1]] - # reset bufsize - self.bufsize = self.DEFAULT_BUFSIZE break else: - if hasmore: - # we have no match, increase bufsize to parse lengthy - # tokens faster (see #86). - self.bufsize = min(self.bufsize * 2, self.MAX_BUFSIZE) - buf = stream.read(self.bufsize) - hasmore = len(buf) == self.bufsize - text = text[pos:] + self._decode(buf) - pos = 0 - continue - try: if text[pos] == '\n': # at EOL, reset state to "root" -- cgit v1.2.1