# # Licensed to the Apache Software Foundation (ASF) under one # or more contributor license agreements. See the NOTICE file # distributed with this work for additional information # regarding copyright ownership. The ASF licenses this file # to you under the Apache License, Version 2.0 (the # "License"); you may not use this file except in compliance # with the License. You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, # software distributed under the License is distributed on an # "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. # import re TYPES = [] class Type: def __init__(self, name, pattern=None): self.name = name self.pattern = pattern if self.pattern: TYPES.append(self) def __repr__(self): return self.name LBRACE = Type("LBRACE", r"\{") RBRACE = Type("RBRACE", r"\}") COLON = Type("COLON", r":") SEMI = Type("SEMI", r";") SLASH = Type("SLASH", r"/") COMMA = Type("COMMA", r",") NUMBER = Type("NUMBER", r'[+-]?[0-9]*\.?[0-9]+') ID = Type("ID", r'[a-zA-Z_](?:[a-zA-Z0-9_-]*[a-zA-Z0-9_])?') STRING = Type("STRING", r""""(?:[^\\"]|\\.)*"|'(?:[^\\']|\\.)*'""") ESC = Type("ESC", r"\\[^ux]|\\x[0-9a-fA-F][0-9a-fA-F]|\\u[0-9a-fA-F][0-9a-fA-F][0-9a-fA-F][0-9a-fA-F]") SYM = Type("SYM", r"[.#*%@$^!+-]") WSPACE = Type("WSPACE", r"[ \n\r\t]+") EOF = Type("EOF") class Token: def __init__(self, type, value, input, position): self.type = type self.value = value self.input = input self.position = position def line_info(self): return line_info(self.input, self.position) def __repr__(self): if self.value is None: return repr(self.type) else: return "%s(%r)" % (self.type, self.value) joined = "|".join(["(%s)" % t.pattern for t in TYPES]) LEXER = re.compile(joined) class LexError(Exception): pass def line_info(st, pos): idx = 0 lineno = 1 column = 0 line_pos = 0 while idx < pos: if st[idx] == "\n": lineno += 1 column = 0 line_pos = idx column += 1 idx += 1 end = st.find("\n", line_pos) if end < 0: end = len(st) line = st[line_pos:end] return line, lineno, column def lex(st): pos = 0 while pos < len(st): m = LEXER.match(st, pos) if m is None: line, ln, col = line_info(st, pos) raise LexError("unrecognized characters line:%s,%s: %s" % (ln, col, line)) else: idx = m.lastindex t = Token(TYPES[idx - 1], m.group(idx), st, pos) yield t pos = m.end() yield Token(EOF, None, st, pos) def tok2str(tok): if tok.type is STRING: return eval(tok.value) elif tok.type is ESC: if tok.value[1] == "x": return eval('"%s"' % tok.value) elif tok.value[1] == "u": return eval('u"%s"' % tok.value) else: return tok.value[1] else: return tok.value def tok2obj(tok): if tok.type in (STRING, NUMBER): return eval(tok.value) else: return tok.value def toks2str(toks): if toks: return "".join(map(tok2str, toks)) else: return None class ParseError(Exception): def __init__(self, token, *expected): line, ln, col = token.line_info() exp = ", ".join(map(str, expected)) if len(expected) > 1: exp = "(%s)" % exp if expected: msg = "expecting %s, got %s line:%s,%s:%s" % (exp, token, ln, col, line) else: msg = "unexpected token %s line:%s,%s:%s" % (token, ln, col, line) Exception.__init__(self, msg) self.token = token self.expected = expected class Parser: def __init__(self, tokens): self.tokens = [t for t in tokens if t.type is not WSPACE] self.idx = 0 def next(self): return self.tokens[self.idx] def matches(self, *types): return self.next().type in types def eat(self, *types): if types and not self.matches(*types): raise ParseError(self.next(), *types) else: t = self.next() self.idx += 1 return t def eat_until(self, *types): result = [] while not self.matches(*types): result.append(self.eat()) return result def parse(self): result = self.address() self.eat(EOF) return result def address(self): name = toks2str(self.eat_until(SLASH, SEMI, EOF)) if name is None: raise ParseError(self.next()) if self.matches(SLASH): self.eat(SLASH) subject = toks2str(self.eat_until(SEMI, EOF)) else: subject = None if self.matches(SEMI): self.eat(SEMI) options = self.map() else: options = None return name, subject, options def map(self): self.eat(LBRACE) result = {} while True: if self.matches(ID): n, v = self.nameval() result[n] = v if self.matches(COMMA): self.eat(COMMA) elif self.matches(RBRACE): break else: raise ParseError(self.next(), COMMA, RBRACE) elif self.matches(RBRACE): break else: raise ParseError(self.next(), ID, RBRACE) self.eat(RBRACE) return result def nameval(self): name = self.eat(ID).value self.eat(COLON) val = self.value() return (name, val) def value(self): if self.matches(NUMBER, STRING, ID): return tok2obj(self.eat()) elif self.matches(LBRACE): return self.map() else: raise ParseError(self.next(), NUMBER, STRING, ID, LBRACE) def parse(addr): return Parser(lex(addr)).parse() __all__ = ["parse", "ParseError"]