1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
|
# -*- coding: utf-8 -*-
"""
Basic Grammar Notation Tests
~~~~~~~~~~~~~~~~~~~~~~~~~~~~
:copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import pytest
from pygments.token import Token
from pygments.lexers import PegLexer
@pytest.fixture(scope='module')
def lexer_peg():
yield PegLexer()
def test_peg_basic(lexer_peg):
fragment = 'rule<-("terminal"/nonterminal/[cls])*\n'
tokens = [
(Token.Name.Class, 'rule'),
(Token.Operator, '<-'),
(Token.Punctuation, '('),
(Token.String.Double, '"terminal"'),
(Token.Operator, '/'),
(Token.Name.Class, 'nonterminal'),
(Token.Operator, '/'),
(Token.Punctuation, '['),
(Token.String, 'cls'),
(Token.Punctuation, ']'),
(Token.Punctuation, ')'),
(Token.Operator, '*'),
(Token.Text, '\n'),
]
assert list(lexer_peg.get_tokens(fragment)) == tokens
def test_peg_operators(lexer_peg):
# see for example:
# - https://github.com/gvanrossum/pegen
# - https://nim-lang.org/docs/pegs.html
fragment = "rule = 'a' | 'b'\n"
tokens = [
(Token.Name.Class, 'rule'),
(Token.Text, ' '),
(Token.Operator, '='),
(Token.Text, ' '),
(Token.String.Single, "'a'"),
(Token.Text, ' '),
(Token.Operator, '|'),
(Token.Text, ' '),
(Token.String.Single, "'b'"),
(Token.Text, '\n'),
]
assert list(lexer_peg.get_tokens(fragment)) == tokens
fragment = "rule: 'a' ~ 'b'\n"
tokens = [
(Token.Name.Class, 'rule'),
(Token.Operator, ':'),
(Token.Text, ' '),
(Token.String.Single, "'a'"),
(Token.Text, ' '),
(Token.Operator, '~'),
(Token.Text, ' '),
(Token.String.Single, "'b'"),
(Token.Text, '\n'),
]
assert list(lexer_peg.get_tokens(fragment)) == tokens
def test_peg_modified_strings(lexer_peg):
# see for example:
# - http://textx.github.io/Arpeggio/
# - https://nim-lang.org/docs/pegs.html
# - https://github.com/erikrose/parsimonious
fragment = '~"regex" i"insensitive" "multimod"ilx ("not modified")\n'
tokens = [
# can't handle parsimonious-style regex while ~ is a cut operator
(Token.Operator, '~'),
(Token.String.Double, '"regex"'),
(Token.Text, ' '),
(Token.String.Double, 'i"insensitive"'),
(Token.Text, ' '),
(Token.String.Double, '"multimod"ilx'),
(Token.Text, ' '),
(Token.Punctuation, '('),
(Token.String.Double, '"not modified"'),
(Token.Punctuation, ')'),
(Token.Text, '\n'),
]
assert list(lexer_peg.get_tokens(fragment)) == tokens
|