Fix SerializerUnicode to split unquoted newlines

This provides a fix to issue #131. The `split_unquoted_newlines()` function added to the utils module handles the splitting of the string by performing a simple iteration of the string passed in and splitting on unquoted CR, LF, or CR+LFs as they are found.
author: Michael Schuller <michael.schuller@artlogic.net> 2014-03-07 18:01:41 +0000
committer: Michael Schuller <michael.schuller@artlogic.net> 2014-03-10 12:20:38 +0000
commit: 480e52fddf28fad591f3214ee28c2d2af8842ce1 (patch)
tree: ad9fe3c141c22113769a7f50b588f1d8c6156819 /sqlparse/utils.py
parent: ff7ba6404342898616be24115f7be4744520289d (diff)
download: sqlparse-480e52fddf28fad591f3214ee28c2d2af8842ce1.tar.gz
1 files changed, 43 insertions, 0 deletions
diff --git a/sqlparse/utils.py b/sqlparse/utils.py
index cdf27b1..2a7fb46 100644
--- a/sqlparse/utils.py
+++ b/sqlparse/utils.py
@@ -94,3 +94,46 @@ def memoize_generator(func):
                 yield item
 
     return wrapped_func
+
+def split_unquoted_newlines(text):
+    """Split a string on all unquoted newlines
+
+    This is a fairly simplistic implementation of splitting a string on all
+    unescaped CR, LF, or CR+LF occurences. Only iterates the string once. Seemed
+    easier than a complex regular expression.
+    """
+    lines = ['']
+    quoted = None
+    escape_next = False
+    last_char = None
+    for c in text:
+        escaped = False
+        # If the previous character was an unescpaed '\', this character is
+        # escaped.
+        if escape_next:
+            escaped = True
+            escape_next = False
+        # If the current character is '\' and it is not escaped, the next
+        # character is escaped.
+        if c == '\\' and not escaped:
+            escape_next = True
+        # Start a quoted portion if a) we aren't in one already, and b) the
+        # quote isn't escaped.
+        if c in '"\'' and not escaped and not quoted:
+            quoted = c
+        # Escaped quotes (obvs) don't count as a closing match.
+        elif c == quoted and not escaped:
+            quoted = None
+
+        if not quoted and c in ['\r', '\n']:
+            if c == '\n' and last_char == '\r':
+                # It's a CR+LF, so don't append another line
+                pass
+            else:
+                lines.append('')
+        else:
+            lines[-1] += c
+
+        last_char = c
+
+    return lines
+\ No newline at end of file
author	Michael Schuller <michael.schuller@artlogic.net>	2014-03-07 18:01:41 +0000
committer	Michael Schuller <michael.schuller@artlogic.net>	2014-03-10 12:20:38 +0000
commit	480e52fddf28fad591f3214ee28c2d2af8842ce1 (patch)
tree	ad9fe3c141c22113769a7f50b588f1d8c6156819 /sqlparse/utils.py
parent	ff7ba6404342898616be24115f7be4744520289d (diff)
download	sqlparse-480e52fddf28fad591f3214ee28c2d2af8842ce1.tar.gz