diff options
Diffstat (limited to 'pygments/scanner.py')
| -rw-r--r-- | pygments/scanner.py | 105 | 
1 files changed, 105 insertions, 0 deletions
diff --git a/pygments/scanner.py b/pygments/scanner.py new file mode 100644 index 00000000..3ff11e4a --- /dev/null +++ b/pygments/scanner.py @@ -0,0 +1,105 @@ +# -*- coding: utf-8 -*- +""" +    pygments.scanner +    ~~~~~~~~~~~~~~~~ + +    This library implements a regex based scanner. Some languages +    like Pascal are easy to parse but have some keywords that +    depend on the context. Because of this it's impossible to lex +    that just by using a regular expression lexer like the +    `RegexLexer`. + +    Have a look at the `DelphiLexer` to get an idea of how to use +    this scanner. + +    :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. +    :license: BSD, see LICENSE for details. +""" +import re + + +class EndOfText(RuntimeError): +    """ +    Raise if end of text is reached and the user +    tried to call a match function. +    """ + + +class Scanner(object): +    """ +    Simple scanner + +    All method patterns are regular expression strings (not +    compiled expressions!) +    """ + +    def __init__(self, text, flags=0): +        """ +        :param text:    The text which should be scanned +        :param flags:   default regular expression flags +        """ +        self.data = text +        self.data_length = len(text) +        self.start_pos = 0 +        self.pos = 0 +        self.flags = flags +        self.last = None +        self.match = None +        self._re_cache = {} + +    def eos(self): +        """`True` if the scanner reached the end of text.""" +        return self.pos >= self.data_length +    eos = property(eos, eos.__doc__) + +    def check(self, pattern): +        """ +        Apply `pattern` on the current position and return +        the match object. (Doesn't touch pos). Use this for +        lookahead. +        """ +        if self.eos: +            raise EndOfText() +        if pattern not in self._re_cache: +            self._re_cache[pattern] = re.compile(pattern, self.flags) +        return self._re_cache[pattern].match(self.data, self.pos) + +    def test(self, pattern): +        """Apply a pattern on the current position and check +        if it patches. Doesn't touch pos. +        """ +        return self.check(pattern) is not None + +    def scan(self, pattern): +        """ +        Scan the text for the given pattern and update pos/match +        and related fields. The return value is a boolen that +        indicates if the pattern matched. The matched value is +        stored on the instance as ``match``, the last value is +        stored as ``last``. ``start_pos`` is the position of the +        pointer before the pattern was matched, ``pos`` is the +        end position. +        """ +        if self.eos: +            raise EndOfText() +        if pattern not in self._re_cache: +            self._re_cache[pattern] = re.compile(pattern, self.flags) +        self.last = self.match +        m = self._re_cache[pattern].match(self.data, self.pos) +        if m is None: +            return False +        self.start_pos = m.start() +        self.pos = m.end() +        self.match = m.group() +        return True + +    def get_char(self): +        """Scan exactly one char.""" +        self.scan('.') + +    def __repr__(self): +        return '<%s %d/%d>' % ( +            self.__class__.__name__, +            self.pos, +            self.data_length +        )  | 
