diff options
author | Matth?us G. Chajdas <dev@anteru.net> | 2019-11-10 13:56:53 +0100 |
---|---|---|
committer | Matth?us G. Chajdas <dev@anteru.net> | 2019-11-10 13:56:53 +0100 |
commit | 1dd3124a9770e11b6684e5dd1e6bc15a0aa3bc67 (patch) | |
tree | 87a171383266dd1f64196589af081bc2f8e497c3 /pygments/scanner.py | |
parent | f1c080e184dc1bbc36eaa7cd729ff3a499de568a (diff) | |
download | pygments-master.tar.gz |
Diffstat (limited to 'pygments/scanner.py')
-rw-r--r-- | pygments/scanner.py | 105 |
1 files changed, 0 insertions, 105 deletions
diff --git a/pygments/scanner.py b/pygments/scanner.py deleted file mode 100644 index bcb19ed9..00000000 --- a/pygments/scanner.py +++ /dev/null @@ -1,105 +0,0 @@ -# -*- coding: utf-8 -*- -""" - pygments.scanner - ~~~~~~~~~~~~~~~~ - - This library implements a regex based scanner. Some languages - like Pascal are easy to parse but have some keywords that - depend on the context. Because of this it's impossible to lex - that just by using a regular expression lexer like the - `RegexLexer`. - - Have a look at the `DelphiLexer` to get an idea of how to use - this scanner. - - :copyright: Copyright 2006-2019 by the Pygments team, see AUTHORS. - :license: BSD, see LICENSE for details. -""" -import re - - -class EndOfText(RuntimeError): - """ - Raise if end of text is reached and the user - tried to call a match function. - """ - - -class Scanner(object): - """ - Simple scanner - - All method patterns are regular expression strings (not - compiled expressions!) - """ - - def __init__(self, text, flags=0): - """ - :param text: The text which should be scanned - :param flags: default regular expression flags - """ - self.data = text - self.data_length = len(text) - self.start_pos = 0 - self.pos = 0 - self.flags = flags - self.last = None - self.match = None - self._re_cache = {} - - def eos(self): - """`True` if the scanner reached the end of text.""" - return self.pos >= self.data_length - eos = property(eos, eos.__doc__) - - def check(self, pattern): - """ - Apply `pattern` on the current position and return - the match object. (Doesn't touch pos). Use this for - lookahead. - """ - if self.eos: - raise EndOfText() - if pattern not in self._re_cache: - self._re_cache[pattern] = re.compile(pattern, self.flags) - return self._re_cache[pattern].match(self.data, self.pos) - - def test(self, pattern): - """Apply a pattern on the current position and check - if it patches. Doesn't touch pos. - """ - return self.check(pattern) is not None - - def scan(self, pattern): - """ - Scan the text for the given pattern and update pos/match - and related fields. The return value is a boolen that - indicates if the pattern matched. The matched value is - stored on the instance as ``match``, the last value is - stored as ``last``. ``start_pos`` is the position of the - pointer before the pattern was matched, ``pos`` is the - end position. - """ - if self.eos: - raise EndOfText() - if pattern not in self._re_cache: - self._re_cache[pattern] = re.compile(pattern, self.flags) - self.last = self.match - m = self._re_cache[pattern].match(self.data, self.pos) - if m is None: - return False - self.start_pos = m.start() - self.pos = m.end() - self.match = m.group() - return True - - def get_char(self): - """Scan exactly one char.""" - self.scan('.') - - def __repr__(self): - return '<%s %d/%d>' % ( - self.__class__.__name__, - self.pos, - self.data_length - ) |