summaryrefslogtreecommitdiff
path: root/tests/test_markdown_lexer.py
diff options
context:
space:
mode:
Diffstat (limited to 'tests/test_markdown_lexer.py')
-rw-r--r--tests/test_markdown_lexer.py525
1 files changed, 525 insertions, 0 deletions
diff --git a/tests/test_markdown_lexer.py b/tests/test_markdown_lexer.py
index 9024bf07..524becd7 100644
--- a/tests/test_markdown_lexer.py
+++ b/tests/test_markdown_lexer.py
@@ -8,6 +8,7 @@
"""
import pytest
+from pygments.token import Generic, Token, String, Keyword, Name
from pygments.lexers.markup import MarkdownLexer
@@ -34,3 +35,527 @@ def test_code_fence_gsm(lexer):
def test_code_fence_gsm_with_no_lexer(lexer):
assert_same_text(lexer, r'```invalid-lexer\nfoo\n```\n')
+
+
+def test_invalid_atx_heading(lexer):
+ fragments = (
+ '#',
+ 'a #',
+ '*#',
+ )
+
+ for fragment in fragments:
+ for token, _ in lexer.get_tokens(fragment):
+ assert token != Generic.Heading
+
+
+def test_atx_heading(lexer):
+ fragments = (
+ '#Heading',
+ '# Heading',
+ '# Another heading',
+ '# Another # heading',
+ '# Heading #',
+ )
+
+ for fragment in fragments:
+ tokens = [
+ (Generic.Heading, fragment),
+ (Token.Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_invalid_atx_subheading(lexer):
+ fragments = (
+ '##',
+ 'a ##',
+ '*##',
+ '####### too many hashes'
+ )
+
+ for fragment in fragments:
+ for token, _ in lexer.get_tokens(fragment):
+ assert token != Generic.Subheading
+
+
+def test_atx_subheading(lexer):
+ fragments = (
+ '##Subheading',
+ '## Subheading',
+ '### Subheading',
+ '#### Subheading',
+ '##### Subheading',
+ '###### Subheading',
+ '## Another subheading',
+ '## Another ## subheading',
+ '###### Subheading #',
+ '###### Subheading ######',
+ )
+
+ for fragment in fragments:
+ tokens = [
+ (Generic.Subheading, fragment),
+ (Token.Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_invalid_setext_heading(lexer):
+ fragments = (
+ 'Heading\n',
+ 'Heading\n_',
+ 'Heading\n =====',
+ 'Heading\na=====',
+ '=====',
+ '\n=\n',
+ 'Heading\n=====Text'
+ )
+
+ for fragment in fragments:
+ for token, _ in lexer.get_tokens(fragment):
+ assert token != Generic.Heading
+
+
+def test_setext_heading(lexer):
+ fragments = (
+ 'Heading\n=',
+ 'Heading\n=======',
+ 'Heading\n==========',
+ )
+
+ for fragment in fragments:
+ tokens = [
+ (Generic.Heading, fragment.split('\n')[0]),
+ (Token.Text, '\n'),
+ (Generic.Heading, fragment.split('\n')[1]),
+ (Token.Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_invalid_setext_subheading(lexer):
+ fragments = (
+ 'Subheading\n',
+ 'Subheading\n_',
+ 'Subheading\n -----',
+ 'Subheading\na-----',
+ '-----',
+ '\n-\n',
+ 'Subheading\n-----Text'
+ )
+
+ for fragment in fragments:
+ for token, _ in lexer.get_tokens(fragment):
+ assert token != Generic.Subheading
+
+
+def test_setext_subheading(lexer):
+ fragments = (
+ 'Subheading\n-',
+ 'Subheading\n----------',
+ 'Subheading\n-----------',
+ )
+
+ for fragment in fragments:
+ tokens = [
+ (Generic.Subheading, fragment.split('\n')[0]),
+ (Token.Text, '\n'),
+ (Generic.Subheading, fragment.split('\n')[1]),
+ (Token.Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_task_list(lexer):
+ fragment = '- [ ] sample task'
+ tokens = [
+ (Keyword, '- '),
+ (Keyword, '[ ]'),
+ (Token.Text, ' '),
+ (Token.Text, 'sample'),
+ (Token.Text, ' '),
+ (Token.Text, 'task'),
+ (Token.Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+ fragment = '* [ ] sample task'
+ tokens = [
+ (Keyword, '* '),
+ (Keyword, '[ ]'),
+ (Token.Text, ' '),
+ (Token.Text, 'sample'),
+ (Token.Text, ' '),
+ (Token.Text, 'task'),
+ (Token.Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+ fragment = ' * [ ] sample task'
+ tokens = [
+ (Token.Text, ' '),
+ (Keyword, '* '),
+ (Keyword, '[ ]'),
+ (Token.Text, ' '),
+ (Token.Text, 'sample'),
+ (Token.Text, ' '),
+ (Token.Text, 'task'),
+ (Token.Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_bulleted_list(lexer):
+ fragment = '* foo\n* bar'
+ tokens = [
+ (Keyword, '*'),
+ (Token.Text, ' '),
+ (Token.Text, 'foo'),
+ (Token.Text, '\n'),
+ (Keyword, '*'),
+ (Token.Text, ' '),
+ (Token.Text, 'bar'),
+ (Token.Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+ fragment = '- foo\n- bar'
+ tokens = [
+ (Keyword, '-'),
+ (Token.Text, ' '),
+ (Token.Text, 'foo'),
+ (Token.Text, '\n'),
+ (Keyword, '-'),
+ (Token.Text, ' '),
+ (Token.Text, 'bar'),
+ (Token.Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+ fragment = '* *foo*\n* bar'
+ tokens = [
+ (Keyword, '*'),
+ (Token.Text, ' '),
+ (Generic.Emph, '*foo*'),
+ (Token.Text, '\n'),
+ (Keyword, '*'),
+ (Token.Text, ' '),
+ (Token.Text, 'bar'),
+ (Token.Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_numbered_list(lexer):
+ fragment = '1. foo\n2. bar'
+ tokens = [
+ (Keyword, '1.'),
+ (Token.Text, ' '),
+ (Token.Text, 'foo'),
+ (Token.Text, '\n'),
+ (Keyword, '2.'),
+ (Token.Text, ' '),
+ (Token.Text, 'bar'),
+ (Token.Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_quote(lexer):
+ fragment = '> a\n> quote'
+ tokens = [
+ (Keyword, '> '),
+ (Generic.Emph, 'a\n'),
+ (Keyword, '> '),
+ (Generic.Emph, 'quote\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_invalid_code_block(lexer):
+ fragments = (
+ '```code```',
+ 'prefix not allowed before ```\ncode block\n```'
+ ' code',
+ )
+
+ for fragment in fragments:
+ for token, _ in lexer.get_tokens(fragment):
+ assert token != String.Backtick
+
+
+def test_code_block_fenced_by_backticks(lexer):
+ fragments = (
+ '```\ncode\n```',
+ '```\nmulti\n`line`\ncode\n```',
+ )
+ for fragment in fragments:
+ tokens = [
+ (String.Backtick, fragment),
+ (Token.Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_code_block_with_language(lexer):
+ fragments = (
+ '```python\nimport this\n```',
+ )
+ for fragment in fragments:
+ tokens = [
+ (String.Backtick, '```'),
+ (String.Backtick, 'python'),
+ (Token.Text, '\n'),
+ (Token.Keyword.Namespace, 'import'),
+ (Token.Text, ' '),
+ (Token.Name.Namespace, 'this'),
+ (Token.Text, '\n'),
+ (String.Backtick, '```'),
+ (Token.Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_code_indented_with_spaces(lexer):
+ fragments = (
+ 'sample:\n\n code\n',
+ )
+ for fragment in fragments:
+ tokens = [
+ (Token.Text, 'sample:'),
+ (Token.Text, '\n\n'),
+ (String.Backtick, ' code\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+ fragments = (
+ 'sample:\n\n\tcode\n',
+ )
+ for fragment in fragments:
+ tokens = [
+ (Token.Text, 'sample:'),
+ (Token.Text, '\n\n'),
+ (String.Backtick, '\tcode\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_inline_code(lexer):
+ fragment = 'code: `code`'
+ tokens = [
+ (Token.Text, 'code:'),
+ (Token.Text, ' '),
+ (String.Backtick, '`code`'),
+ (Token.Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+ fragment = ' `**code**`'
+ tokens = [
+ (Token.Text, ' '),
+ (String.Backtick, '`**code**`'),
+ (Token.Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+ fragment = '(`code`)'
+ tokens = [
+ (Token.Text, '('),
+ (String.Backtick, '`code`'),
+ (Token.Text, ')'),
+ (Token.Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_invalid_bold(lexer):
+ fragments = (
+ '**no bold__',
+ '__no bold**',
+ '*no bold*',
+ '_no bold_',
+ )
+
+ for fragment in fragments:
+ for token, _ in lexer.get_tokens(fragment):
+ assert token != Generic.Strong
+
+
+def test_bold_fenced_by_asterisk(lexer):
+ fragment = '**bold**'
+ tokens = [
+ (Generic.Strong, '**bold**'),
+ (Token.Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_bold_fenced_by_underscore(lexer):
+ fragment = '__bold__'
+ tokens = [
+ (Generic.Strong, '__bold__'),
+ (Token.Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_invalid_italics(lexer):
+ fragments = (
+ '*no italics_',
+ '_no italics*',
+ '**no italics**',
+ '__no italics__',
+ )
+
+ for fragment in fragments:
+ for token, _ in lexer.get_tokens(fragment):
+ assert token != Generic.Emph
+
+
+def test_italics_fenced_by_asterisk(lexer):
+ fragment = '*italics*'
+ tokens = [
+ (Generic.Emph, '*italics*'),
+ (Token.Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_italics_fenced_by_underscore(lexer):
+ fragment = '_italics_'
+ tokens = [
+ (Generic.Emph, '_italics_'),
+ (Token.Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_escape_italics(lexer):
+ fragments = (
+ r'\*no italics\*',
+ r'\_ no italics \_',
+ )
+
+ for fragment in fragments:
+ for token, _ in lexer.get_tokens(fragment):
+ assert token != Generic.Emph
+
+
+def test_italics_no_multiline(lexer):
+ fragment = '*no\nitalics*'
+
+ for token, _ in lexer.get_tokens(fragment):
+ assert token != Generic.Emph
+
+
+def test_italics_and_bold(lexer):
+ fragment = '**bold** and *italics*'
+ tokens = [
+ (Generic.Strong, '**bold**'),
+ (Token.Text, ' '),
+ (Token.Text, 'and'),
+ (Token.Text, ' '),
+ (Generic.Emph, '*italics*'),
+ (Token.Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+ fragment = '*italics* and **bold**'
+ tokens = [
+ (Generic.Emph, '*italics*'),
+ (Token.Text, ' '),
+ (Token.Text, 'and'),
+ (Token.Text, ' '),
+ (Generic.Strong, '**bold**'),
+ (Token.Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_strikethrough(lexer):
+ fragment = '~~striked~~not striked'
+ tokens = [
+ (Generic.Deleted, '~~striked~~'),
+ (Token.Text, 'not'),
+ (Token.Text, ' '),
+ (Token.Text, 'striked'),
+ (Token.Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_mentions(lexer):
+ fragment = 'note for @me:'
+ tokens = [
+ (Token.Text, 'note'),
+ (Token.Text, ' '),
+ (Token.Text, 'for'),
+ (Token.Text, ' '),
+ (Name.Entity, '@me:'),
+ (Token.Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_topics(lexer):
+ fragment = 'message to #you:'
+ tokens = [
+ (Token.Text, 'message'),
+ (Token.Text, ' '),
+ (Token.Text, 'to'),
+ (Token.Text, ' '),
+ (Name.Entity, '#you:'),
+ (Token.Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_links(lexer):
+ fragment = '[text](link)'
+ tokens = [
+ (Token.Text, '['),
+ (Token.Name.Tag, 'text'),
+ (Token.Text, ']'),
+ (Token.Text, '('),
+ (Token.Name.Attribute, 'link'),
+ (Token.Text, ')'),
+ (Token.Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+ fragment = '![Image of foo](https://bar.baz)'
+ tokens = [
+ (Token.Text, '!['),
+ (Token.Name.Tag, 'Image of foo'),
+ (Token.Text, ']'),
+ (Token.Text, '('),
+ (Token.Name.Attribute, 'https://bar.baz'),
+ (Token.Text, ')'),
+ (Token.Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+
+def test_reference_style_links(lexer):
+ fragment = '[an example][id]'
+ tokens = [
+ (Token.Text, '['),
+ (Token.Name.Tag, 'an example'),
+ (Token.Text, ']'),
+ (Token.Text, '['),
+ (Token.Name.Label, 'id'),
+ (Token.Text, ']'),
+ (Token.Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens
+
+ fragment = '[id]: http://example.com'
+ tokens = [
+ (Token.Text, '['),
+ (Token.Name.Label, 'id'),
+ (Token.Text, ']: '),
+ (Token.Name.Attribute, 'http://example.com'),
+ (Token.Text, '\n'),
+ ]
+ assert list(lexer.get_tokens(fragment)) == tokens