1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
|
'''
Created on 17/05/2012
@author: piranna
'''
try:
from collections import OrderedDict
except ImportError:
OrderedDict = None
if OrderedDict:
class Cache(OrderedDict):
"""Cache with LRU algorithm using an OrderedDict as basis
"""
def __init__(self, maxsize=100):
OrderedDict.__init__(self)
self._maxsize = maxsize
def __getitem__(self, key, *args, **kwargs):
# Get the key and remove it from the cache, or raise KeyError
value = OrderedDict.__getitem__(self, key)
del self[key]
# Insert the (key, value) pair on the front of the cache
OrderedDict.__setitem__(self, key, value)
# Return the value from the cache
return value
def __setitem__(self, key, value, *args, **kwargs):
# Key was inserted before, remove it so we put it at front later
if key in self:
del self[key]
# Too much items on the cache, remove the least recent used
elif len(self) >= self._maxsize:
self.popitem(False)
# Insert the (key, value) pair on the front of the cache
OrderedDict.__setitem__(self, key, value, *args, **kwargs)
else:
class Cache(dict):
"""Cache that reset when gets full
"""
def __init__(self, maxsize=100):
dict.__init__(self)
self._maxsize = maxsize
def __setitem__(self, key, value, *args, **kwargs):
# Reset the cache if we have too much cached entries and start over
if len(self) >= self._maxsize:
self.clear()
# Insert the (key, value) pair on the front of the cache
dict.__setitem__(self, key, value, *args, **kwargs)
def memoize_generator(func):
"""Memoize decorator for generators
Store `func` results in a cache according to their arguments as 'memoize'
does but instead this works on decorators instead of regular functions.
Obviusly, this is only useful if the generator will always return the same
values for each specific parameters...
"""
cache = Cache()
def wrapped_func(*args, **kwargs):
# params = (args, kwargs)
params = (args, tuple(sorted(kwargs.items())))
# Look if cached
try:
cached = cache[params]
# Not cached, exec and store it
except KeyError:
cached = []
for item in func(*args, **kwargs):
cached.append(item)
yield item
cache[params] = cached
# Cached, yield its items
else:
for item in cached:
yield item
return wrapped_func
def split_unquoted_newlines(text):
"""Split a string on all unquoted newlines
This is a fairly simplistic implementation of splitting a string on all
unescaped CR, LF, or CR+LF occurences. Only iterates the string once. Seemed
easier than a complex regular expression.
"""
lines = ['']
quoted = None
escape_next = False
last_char = None
for c in text:
escaped = False
# If the previous character was an unescpaed '\', this character is
# escaped.
if escape_next:
escaped = True
escape_next = False
# If the current character is '\' and it is not escaped, the next
# character is escaped.
if c == '\\' and not escaped:
escape_next = True
# Start a quoted portion if a) we aren't in one already, and b) the
# quote isn't escaped.
if c in '"\'' and not escaped and not quoted:
quoted = c
# Escaped quotes (obvs) don't count as a closing match.
elif c == quoted and not escaped:
quoted = None
if not quoted and c in ['\r', '\n']:
if c == '\n' and last_char == '\r':
# It's a CR+LF, so don't append another line
pass
else:
lines.append('')
else:
lines[-1] += c
last_char = c
return lines
|