#!/usr/bin/env python """ Defines LineSplitter and helper functions. ----- Permission to use, modify, and distribute this software is given under the terms of the NumPy License. See http://scipy.org. NO WARRANTY IS EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK. Author: Pearu Peterson Created: May 2006 ----- """ __all__ = ['String','string_replace_map','splitquote','splitparen'] import re class String(str): pass class ParenString(str): pass def split2(line, lower=False): """ Split line into non-string part and into a start of a string part. Returns 2-tuple. The second item either is empty string or start of a string part. """ return LineSplitter(line,lower=lower).split2() _f2py_str_findall = re.compile(r"_F2PY_STRING_CONSTANT_\d+_").findall _is_name = re.compile(r'\w*\Z',re.I).match _is_simple_str = re.compile(r'\w*\Z',re.I).match _f2py_findall = re.compile(r'(_F2PY_STRING_CONSTANT_\d+_|F2PY_EXPR_TUPLE_\d+)').findall class string_replace_dict(dict): """ Dictionary object that is callable for applying map returned by string_replace_map() function. """ def __call__(self, line): for k in _f2py_findall(line): line = line.replace(k, self[k]) return line def string_replace_map(line, lower=False, _cache={'index':0,'pindex':0}): """ 1) Replaces string constants with symbol `'_F2PY_STRING_CONSTANT__'` 2) Replaces (expression) with symbol `(F2PY_EXPR_TUPLE_)` Returns a new line and the replacement map. """ items = [] string_map = string_replace_dict() rev_string_map = {} for item in splitquote(line, lower=lower)[0]: if isinstance(item, String) and not _is_simple_str(item[1:-1]): key = rev_string_map.get(item) if key is None: _cache['index'] += 1 index = _cache['index'] key = "_F2PY_STRING_CONSTANT_%s_" % (index) it = item[1:-1] string_map[key] = it rev_string_map[it] = key items.append(item[0]+key+item[-1]) else: items.append(item) newline = ''.join(items) items = [] expr_keys = [] for item in splitparen(newline): if isinstance(item, ParenString) and not _is_name(item[1:-1]): key = rev_string_map.get(item) if key is None: _cache['pindex'] += 1 index = _cache['pindex'] key = 'F2PY_EXPR_TUPLE_%s' % (index) it = item[1:-1].strip() string_map[key] = it rev_string_map[it] = key expr_keys.append(key) items.append(item[0]+key+item[-1]) else: items.append(item) found_keys = set() for k in expr_keys: v = string_map[k] l = _f2py_str_findall(v) if l: found_keys = found_keys.union(l) for k1 in l: v = v.replace(k1, string_map[k1]) string_map[k] = v for k in found_keys: del string_map[k] return ''.join(items), string_map def splitquote(line, stopchar=None, lower=False, quotechars = '"\''): """ Fast LineSplitter """ items = [] i = 0 while 1: try: char = line[i]; i += 1 except IndexError: break l = [] l_append = l.append nofslashes = 0 if stopchar is None: # search for string start while 1: if char in quotechars and not nofslashes % 2: stopchar = char i -= 1 break if char=='\\': nofslashes += 1 else: nofslashes = 0 l_append(char) try: char = line[i]; i += 1 except IndexError: break if not l: continue item = ''.join(l) if lower: item = item.lower() items.append(item) continue if char==stopchar: # string starts with quotechar l_append(char) try: char = line[i]; i += 1 except IndexError: if l: item = String(''.join(l)) items.append(item) break # else continued string while 1: if char==stopchar and not nofslashes % 2: l_append(char) stopchar = None break if char=='\\': nofslashes += 1 else: nofslashes = 0 l_append(char) try: char = line[i]; i += 1 except IndexError: break if l: item = String(''.join(l)) items.append(item) return items, stopchar class LineSplitterBase: def __iter__(self): return self def next(self): item = '' while not item: item = self.get_item() # get_item raises StopIteration return item class LineSplitter(LineSplitterBase): """ Splits a line into non strings and strings. E.g. abc=\"123\" -> ['abc=','\"123\"'] Handles splitting lines with incomplete string blocks. """ def __init__(self, line, quotechar = None, lower=False, ): self.fifo_line = [c for c in line] self.fifo_line.reverse() self.quotechar = quotechar self.lower = lower def split2(self): """ Split line until the first start of a string. """ try: item1 = self.get_item() except StopIteration: return '','' i = len(item1) l = self.fifo_line[:] l.reverse() item2 = ''.join(l) return item1,item2 def get_item(self): fifo_pop = self.fifo_line.pop try: char = fifo_pop() except IndexError: raise StopIteration fifo_append = self.fifo_line.append quotechar = self.quotechar l = [] l_append = l.append nofslashes = 0 if quotechar is None: # search for string start while 1: if char in '"\'' and not nofslashes % 2: self.quotechar = char fifo_append(char) break if char=='\\': nofslashes += 1 else: nofslashes = 0 l_append(char) try: char = fifo_pop() except IndexError: break item = ''.join(l) if self.lower: item = item.lower() return item if char==quotechar: # string starts with quotechar l_append(char) try: char = fifo_pop() except IndexError: return String(''.join(l)) # else continued string while 1: if char==quotechar and not nofslashes % 2: l_append(char) self.quotechar = None break if char=='\\': nofslashes += 1 else: nofslashes = 0 l_append(char) try: char = fifo_pop() except IndexError: break return String(''.join(l)) def splitparen(line,paren='()'): """ Fast LineSplitterParen. """ stopchar = None startchar, endchar = paren[0],paren[1] items = [] i = 0 while 1: try: char = line[i]; i += 1 except IndexError: break nofslashes = 0 l = [] l_append = l.append if stopchar is None: # search for parenthesis start while 1: if char==startchar and not nofslashes % 2: stopchar = endchar i -= 1 break if char=='\\': nofslashes += 1 else: nofslashes = 0 l_append(char) try: char = line[i]; i += 1 except IndexError: break item = ''.join(l) else: nofstarts = 0 while 1: if char==stopchar and not nofslashes % 2 and nofstarts==1: l_append(char) stopchar = None break if char=='\\': nofslashes += 1 else: nofslashes = 0 if char==startchar: nofstarts += 1 elif char==endchar: nofstarts -= 1 l_append(char) try: char = line[i]; i += 1 except IndexError: break item = ParenString(''.join(l)) items.append(item) return items class LineSplitterParen(LineSplitterBase): """ Splits a line into strings and strings with parenthesis. E.g. a(x) = b(c,d) -> ['a','(x)',' = b','(c,d)'] """ def __init__(self, line, paren = '()'): self.fifo_line = [c for c in line] self.fifo_line.reverse() self.startchar = paren[0] self.endchar = paren[1] self.stopchar = None def get_item(self): fifo_pop = self.fifo_line.pop try: char = fifo_pop() except IndexError: raise StopIteration fifo_append = self.fifo_line.append startchar = self.startchar endchar = self.endchar stopchar = self.stopchar l = [] l_append = l.append nofslashes = 0 if stopchar is None: # search for parenthesis start while 1: if char==startchar and not nofslashes % 2: self.stopchar = endchar fifo_append(char) break if char=='\\': nofslashes += 1 else: nofslashes = 0 l_append(char) try: char = fifo_pop() except IndexError: break item = ''.join(l) return item nofstarts = 0 while 1: if char==stopchar and not nofslashes % 2 and nofstarts==1: l_append(char) self.stopchar = None break if char=='\\': nofslashes += 1 else: nofslashes = 0 if char==startchar: nofstarts += 1 elif char==endchar: nofstarts -= 1 l_append(char) try: char = fifo_pop() except IndexError: break return ParenString(''.join(l)) def test(): splitter = LineSplitter('abc\\\' def"12\\"3""56"dfad\'a d\'') l = [item for item in splitter] assert l==['abc\\\' def','"12\\"3"','"56"','dfad','\'a d\''],`l` assert splitter.quotechar is None l,stopchar=splitquote('abc\\\' def"12\\"3""56"dfad\'a d\'') assert l==['abc\\\' def','"12\\"3"','"56"','dfad','\'a d\''],`l` assert stopchar is None splitter = LineSplitter('"abc123&') l = [item for item in splitter] assert l==['"abc123&'],`l` assert splitter.quotechar=='"' l,stopchar = splitquote('"abc123&') assert l==['"abc123&'],`l` assert stopchar=='"' splitter = LineSplitter(' &abc"123','"') l = [item for item in splitter] assert l==[' &abc"','123'] assert splitter.quotechar is None l,stopchar = splitquote(' &abc"123','"') assert l==[' &abc"','123'] assert stopchar is None l = split2('') assert l==('',''),`l` l = split2('12') assert l==('12',''),`l` l = split2('1"a"//"b"') assert l==('1','"a"//"b"'),`l` l = split2('"ab"') assert l==('','"ab"'),`l` splitter = LineSplitterParen('a(b) = b(x,y(1)) b\((a)\)') l = [item for item in splitter] assert l==['a', '(b)', ' = b', '(x,y(1))', ' b\\(', '(a)', '\\)'],`l` l = splitparen('a(b) = b(x,y(1)) b\((a)\)') assert l==['a', '(b)', ' = b', '(x,y(1))', ' b\\(', '(a)', '\\)'],`l` l = string_replace_map('a()') print l print 'ok' if __name__ == '__main__': test()