1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
|
'''
Created on 17/05/2012
@author: piranna
'''
import re
from collections import OrderedDict
class Cache(OrderedDict):
"""Cache with LRU algorithm using an OrderedDict as basis."""
def __init__(self, maxsize=100):
OrderedDict.__init__(self)
self._maxsize = maxsize
def __getitem__(self, key, *args, **kwargs):
# Get the key and remove it from the cache, or raise KeyError
value = OrderedDict.__getitem__(self, key)
del self[key]
# Insert the (key, value) pair on the front of the cache
OrderedDict.__setitem__(self, key, value)
# Return the value from the cache
return value
def __setitem__(self, key, value, *args, **kwargs):
# Key was inserted before, remove it so we put it at front later
if key in self:
del self[key]
# Too much items on the cache, remove the least recent used
elif len(self) >= self._maxsize:
self.popitem(False)
# Insert the (key, value) pair on the front of the cache
OrderedDict.__setitem__(self, key, value, *args, **kwargs)
def memoize_generator(func):
"""Memoize decorator for generators.
Store `func` results in a cache according to their arguments as 'memoize'
does but instead this works on decorators instead of regular functions.
Obviusly, this is only useful if the generator will always return the same
values for each specific parameters...
"""
cache = Cache()
def wrapped_func(*args, **kwargs):
params = (args, tuple(sorted(kwargs.items())))
# Look if cached
try:
cached = cache[params]
# Not cached, exec and store it
except KeyError:
cached = []
for item in func(*args, **kwargs):
cached.append(item)
yield item
cache[params] = cached
# Cached, yield its items
else:
for item in cached:
yield item
return wrapped_func
# This regular expression replaces the home-cooked parser that was here before.
# It is much faster, but requires an extra post-processing step to get the
# desired results (that are compatible with what you would expect from the
# str.splitlines() method).
#
# It matches groups of characters: newlines, quoted strings, or unquoted text,
# and splits on that basis. The post-processing step puts those back together
# into the actual lines of SQL.
SPLIT_REGEX = re.compile(r"""
(
(?: # Start of non-capturing group
(?:\r\n|\r|\n) | # Match any single newline, or
[^\r\n'"]+ | # Match any character series without quotes or
# newlines, or
"(?:[^"\\]|\\.)*" | # Match double-quoted strings, or
'(?:[^'\\]|\\.)*' # Match single quoted strings
)
)
""", re.VERBOSE)
LINE_MATCH = re.compile(r'(\r\n|\r|\n)')
def split_unquoted_newlines(text):
"""Split a string on all unquoted newlines.
Unlike str.splitlines(), this will ignore CR/LF/CR+LF if the requisite
character is inside of a string."""
lines = SPLIT_REGEX.split(text)
outputlines = ['']
for line in lines:
if not line:
continue
elif LINE_MATCH.match(line):
outputlines.append('')
else:
outputlines[-1] += line
return outputlines
|