""" Tools for constructing patterns. ----- Permission to use, modify, and distribute this software is given under the terms of the NumPy License. See http://scipy.org. NO WARRANTY IS EXPRESSED OR IMPLIED. USE AT YOUR OWN RISK. Author: Pearu Peterson Created: Oct 2006 ----- """ import re class Pattern: """ p1 | p2 -> | p1 + p2 -> p1 & p2 -> ~p1 -> [ ] ~~p1 -> [ ]... ~~~p1 -> [ ]... ~~~~p1 -> ~~~p1 abs(p1) -> whole string match of p1.named(name) -> match of has name p1.match(string) -> return string match with p1.flags() p1.rsplit(..) -> split a string from the rightmost p1 occurrence p1.lsplit(..) -> split a string from the leftmost p1 occurrence """ _special_symbol_map = {'.': '[.]', '*': '[*]', '+': '[+]', '|': '[|]', '(': r'\(', ')': r'\)', '[': r'\[', ']': r'\]', '^': '[^]', '$': '[$]', '?': '[?]', '{': '\{', '}': '\}', '>': '[>]', '<': '[<]', '=': '[=]' } def __init__(self, label, pattern, optional=0, flags=0, value=None): self.label = label self.pattern = pattern self.optional = optional self._flags = flags self.value = value return def flags(self, *flags): f = self._flags for f1 in flags: f = f | f1 return Pattern(self.label, self.pattern, optional=self.optional, flags=f, value=self.value) def get_compiled(self): try: return self._compiled_pattern except AttributeError: self._compiled_pattern = compiled = re.compile(self.pattern, self._flags) return compiled def match(self, string): return self.get_compiled().match(string) def search(self, string): return self.get_compiled().search(string) def rsplit(self, string): """ Return (, , ) where string = lhs + pattern_match + rhs and rhs does not contain pattern_match. If no pattern_match is found in string, return None. """ compiled = self.get_compiled() t = compiled.split(string) if len(t) < 3: return if '' in t[1:-1]: return rhs = t[-1].strip() pattern_match = t[-2].strip() assert abs(self).match(pattern_match),`self,string,t,pattern_match` lhs = (''.join(t[:-2])).strip() return lhs, pattern_match, rhs def lsplit(self, string): """ Return (, , ) where string = lhs + pattern_match + rhs and rhs does not contain pattern_match. If no pattern_match is found in string, return None. """ compiled = self.get_compiled() t = compiled.split(string) # can be optimized if len(t) < 3: return lhs = t[0].strip() pattern_match = t[1].strip() rhs = (''.join(t[2:])).strip() assert abs(self).match(pattern_match),`pattern_match` return lhs, pattern_match, rhs def __abs__(self): return Pattern(self.label, r'\A' + self.pattern+ r'\Z',flags=self._flags, value=self.value) def __repr__(self): return '%s(%r, %r)' % (self.__class__.__name__, self.label, self.pattern) def __or__(self, other): label = '( %s OR %s )' % (self.label, other.label) if self.pattern==other.pattern: pattern = self.pattern flags = self._flags else: pattern = '(%s|%s)' % (self.pattern, other.pattern) flags = self._flags | other._flags return Pattern(label, pattern, flags=flags) def __and__(self, other): if isinstance(other, Pattern): label = '%s%s' % (self.label, other.label) pattern = self.pattern + other.pattern flags = self._flags | other._flags else: assert isinstance(other,str),`other` label = '%s%s' % (self.label, other) pattern = self.pattern + other flags = self._flags return Pattern(label, pattern, flags=flags) def __rand__(self, other): assert isinstance(other,str),`other` label = '%s%s' % (other, self.label) pattern = other + self.pattern return Pattern(label, pattern, flags=self._flags) def __invert__(self): if self.optional: if self.optional==1: return Pattern(self.label + '...', self.pattern[:-1] + '*', optional=2,flags=self._flags) if self.optional==2: return Pattern('%s %s' % (self.label[1:-4].strip(), self.label), self.pattern[:-1] + '+', optional=3, flags=self._flags) return self label = '[ %s ]' % (self.label) pattern = '(%s)?' % (self.pattern) return Pattern(label, pattern, optional=1, flags=self._flags) def __add__(self, other): if isinstance(other, Pattern): label = '%s %s' % (self.label, other.label) pattern = self.pattern + r'\s*' + other.pattern flags = self._flags | other._flags else: assert isinstance(other,str),`other` label = '%s %s' % (self.label, other) other = self._special_symbol_map.get(other, other) pattern = self.pattern + r'\s*' + other flags = self._flags return Pattern(label, pattern, flags = flags) def __radd__(self, other): assert isinstance(other,str),`other` label = '%s %s' % (other, self.label) other = self._special_symbol_map.get(other, other) pattern = other + r'\s*' + self.pattern return Pattern(label, pattern, flags=self._flags) def named(self, name = None): if name is None: label = self.label assert label[0]+label[-1]=='<>' and ' ' not in label,`label` else: label = '<%s>' % (name) pattern = '(?P%s%s)' % (label.replace('-','_'), self.pattern) return Pattern(label, pattern, flags=self._flags, value= self.value) def rename(self, label): if label[0]+label[-1]!='<>': label = '<%s>' % (label) return Pattern(label, self.pattern, optional=self.optional, flags=self._flags, value=self.value) def __call__(self, string): m = self.match(string) if m is None: return if self.value is not None: return self.value return m.group() # Predefined patterns letter = Pattern('','[A-Z]',flags=re.I) name = Pattern('', r'[A-Z]\w*',flags=re.I) digit = Pattern('',r'\d') underscore = Pattern('', '_') binary_digit = Pattern('',r'[01]') octal_digit = Pattern('',r'[0-7]') hex_digit = Pattern('',r'[\dA-F]',flags=re.I) digit_string = Pattern('',r'\d+') binary_digit_string = Pattern('',r'[01]+') octal_digit_string = Pattern('',r'[0-7]+') hex_digit_string = Pattern('',r'[\dA-F]+',flags=re.I) sign = Pattern('',r'[+-]') exponent_letter = Pattern('',r'[ED]',flags=re.I) alphanumeric_character = Pattern('',r'\w') # [A-Z0-9_] special_character = Pattern('',r'[ =+-*/\()[\]{},.:;!"%&~<>?,\'`^|$#@]') character = alphanumeric_character | special_character kind_param = digit_string | name kind_param_named = kind_param.named('kind-param') signed_digit_string = ~sign + digit_string int_literal_constant = digit_string + ~('_' + kind_param) signed_int_literal_constant = ~sign + int_literal_constant int_literal_constant_named = digit_string.named('value') + ~ ('_' + kind_param_named) signed_int_literal_constant_named = (~sign + digit_string).named('value') + ~ ('_' + kind_param_named) binary_constant = ('B' + ("'" & binary_digit_string & "'" | '"' & binary_digit_string & '"')).flags(re.I) octal_constant = ('O' + ("'" & octal_digit_string & "'" | '"' & octal_digit_string & '"')).flags(re.I) hex_constant = ('Z' + ("'" & hex_digit_string & "'" | '"' & hex_digit_string & '"')).flags(re.I) boz_literal_constant = binary_constant | octal_constant | hex_constant exponent = signed_digit_string significand = digit_string + '.' + ~digit_string | '.' + digit_string real_literal_constant = significand + ~(exponent_letter + exponent) + ~ ('_' + kind_param) | \ digit_string + exponent_letter + exponent + ~ ('_' + kind_param) real_literal_constant_named = (significand + ~(exponent_letter + exponent) |\ digit_string + exponent_letter + exponent).named('value') + ~ ('_' + kind_param_named) signed_real_literal_constant_named = (~sign + (significand + ~(exponent_letter + exponent) |\ digit_string + exponent_letter + exponent)).named('value') + ~ ('_' + kind_param_named) signed_real_literal_constant = ~sign + real_literal_constant named_constant = name real_part = signed_int_literal_constant | signed_real_literal_constant | named_constant imag_part = real_part complex_literal_constant = '(' + real_part + ',' + imag_part + ')' a_n_rep_char = Pattern('',r'\w') rep_char = Pattern('',r'.') char_literal_constant = ~( kind_param + '_') + ("'" + ~~rep_char + "'" | '"' + ~~rep_char + '"' ) a_n_char_literal_constant_named1 = ~( kind_param_named + '_') + (~~~("'" + ~~a_n_rep_char + "'" )).named('value') a_n_char_literal_constant_named2 = ~( kind_param_named + '_') + (~~~('"' + ~~a_n_rep_char + '"' )).named('value') logical_literal_constant = ('[.](TRUE|FALSE)[.]' + ~ ('_' + kind_param)).flags(re.I) logical_literal_constant_named = Pattern('',r'[.](TRUE|FALSE)[.]',flags=re.I).named() + ~ ('_' + kind_param_named) literal_constant = int_literal_constant | real_literal_constant | complex_literal_constant | logical_literal_constant | char_literal_constant | boz_literal_constant constant = literal_constant | named_constant int_constant = int_literal_constant | boz_literal_constant | named_constant char_constant = char_literal_constant | named_constant # assume that replace_string_map is applied: part_ref = name + ~((r'[(]' + name + r'[)]')) data_ref = part_ref + ~~~(r'[%]' + part_ref) primary = constant | name | data_ref | (r'[(]' + name + r'[)]') power_op = Pattern('',r'(?',r'(?',r'[+-]') concat_op = Pattern('',r'(?','[.]EQ[.]|[.]NE[.]|[.]LT[.]|[.]LE[.]|[.]GT[.]|[.]GE[.]|[=]{2}|/[=]|[<][=]|[<]|[>][=]|[>]',flags=re.I) not_op = Pattern('','[.]NOT[.]',flags=re.I) and_op = Pattern('','[.]AND[.]',flags=re.I) or_op = Pattern('','[.]OR[.]',flags=re.I) equiv_op = Pattern('','[.]EQV[.]|[.]NEQV[.]',flags=re.I) percent_op = Pattern('',r'%',flags=re.I) intrinsic_operator = power_op | mult_op | add_op | concat_op | rel_op | not_op | and_op | or_op | equiv_op extended_intrinsic_operator = intrinsic_operator defined_unary_op = Pattern('','[.][A-Z]+[.]',flags=re.I) defined_binary_op = Pattern('','[.][A-Z]+[.]',flags=re.I) defined_operator = defined_unary_op | defined_binary_op | extended_intrinsic_operator abs_defined_operator = abs(defined_operator) defined_op = Pattern('','[.][A-Z]+[.]',flags=re.I) abs_defined_op = abs(defined_op) non_defined_binary_op = intrinsic_operator | logical_literal_constant label = Pattern('