diff options
Diffstat (limited to 'tests/test_markdown_lexer.py')
-rw-r--r-- | tests/test_markdown_lexer.py | 525 |
1 files changed, 525 insertions, 0 deletions
diff --git a/tests/test_markdown_lexer.py b/tests/test_markdown_lexer.py index 9024bf07..524becd7 100644 --- a/tests/test_markdown_lexer.py +++ b/tests/test_markdown_lexer.py @@ -8,6 +8,7 @@ """ import pytest +from pygments.token import Generic, Token, String, Keyword, Name from pygments.lexers.markup import MarkdownLexer @@ -34,3 +35,527 @@ def test_code_fence_gsm(lexer): def test_code_fence_gsm_with_no_lexer(lexer): assert_same_text(lexer, r'```invalid-lexer\nfoo\n```\n') + + +def test_invalid_atx_heading(lexer): + fragments = ( + '#', + 'a #', + '*#', + ) + + for fragment in fragments: + for token, _ in lexer.get_tokens(fragment): + assert token != Generic.Heading + + +def test_atx_heading(lexer): + fragments = ( + '#Heading', + '# Heading', + '# Another heading', + '# Another # heading', + '# Heading #', + ) + + for fragment in fragments: + tokens = [ + (Generic.Heading, fragment), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +def test_invalid_atx_subheading(lexer): + fragments = ( + '##', + 'a ##', + '*##', + '####### too many hashes' + ) + + for fragment in fragments: + for token, _ in lexer.get_tokens(fragment): + assert token != Generic.Subheading + + +def test_atx_subheading(lexer): + fragments = ( + '##Subheading', + '## Subheading', + '### Subheading', + '#### Subheading', + '##### Subheading', + '###### Subheading', + '## Another subheading', + '## Another ## subheading', + '###### Subheading #', + '###### Subheading ######', + ) + + for fragment in fragments: + tokens = [ + (Generic.Subheading, fragment), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +def test_invalid_setext_heading(lexer): + fragments = ( + 'Heading\n', + 'Heading\n_', + 'Heading\n =====', + 'Heading\na=====', + '=====', + '\n=\n', + 'Heading\n=====Text' + ) + + for fragment in fragments: + for token, _ in lexer.get_tokens(fragment): + assert token != Generic.Heading + + +def test_setext_heading(lexer): + fragments = ( + 'Heading\n=', + 'Heading\n=======', + 'Heading\n==========', + ) + + for fragment in fragments: + tokens = [ + (Generic.Heading, fragment.split('\n')[0]), + (Token.Text, '\n'), + (Generic.Heading, fragment.split('\n')[1]), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +def test_invalid_setext_subheading(lexer): + fragments = ( + 'Subheading\n', + 'Subheading\n_', + 'Subheading\n -----', + 'Subheading\na-----', + '-----', + '\n-\n', + 'Subheading\n-----Text' + ) + + for fragment in fragments: + for token, _ in lexer.get_tokens(fragment): + assert token != Generic.Subheading + + +def test_setext_subheading(lexer): + fragments = ( + 'Subheading\n-', + 'Subheading\n----------', + 'Subheading\n-----------', + ) + + for fragment in fragments: + tokens = [ + (Generic.Subheading, fragment.split('\n')[0]), + (Token.Text, '\n'), + (Generic.Subheading, fragment.split('\n')[1]), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +def test_task_list(lexer): + fragment = '- [ ] sample task' + tokens = [ + (Keyword, '- '), + (Keyword, '[ ]'), + (Token.Text, ' '), + (Token.Text, 'sample'), + (Token.Text, ' '), + (Token.Text, 'task'), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + fragment = '* [ ] sample task' + tokens = [ + (Keyword, '* '), + (Keyword, '[ ]'), + (Token.Text, ' '), + (Token.Text, 'sample'), + (Token.Text, ' '), + (Token.Text, 'task'), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + fragment = ' * [ ] sample task' + tokens = [ + (Token.Text, ' '), + (Keyword, '* '), + (Keyword, '[ ]'), + (Token.Text, ' '), + (Token.Text, 'sample'), + (Token.Text, ' '), + (Token.Text, 'task'), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +def test_bulleted_list(lexer): + fragment = '* foo\n* bar' + tokens = [ + (Keyword, '*'), + (Token.Text, ' '), + (Token.Text, 'foo'), + (Token.Text, '\n'), + (Keyword, '*'), + (Token.Text, ' '), + (Token.Text, 'bar'), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + fragment = '- foo\n- bar' + tokens = [ + (Keyword, '-'), + (Token.Text, ' '), + (Token.Text, 'foo'), + (Token.Text, '\n'), + (Keyword, '-'), + (Token.Text, ' '), + (Token.Text, 'bar'), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + fragment = '* *foo*\n* bar' + tokens = [ + (Keyword, '*'), + (Token.Text, ' '), + (Generic.Emph, '*foo*'), + (Token.Text, '\n'), + (Keyword, '*'), + (Token.Text, ' '), + (Token.Text, 'bar'), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +def test_numbered_list(lexer): + fragment = '1. foo\n2. bar' + tokens = [ + (Keyword, '1.'), + (Token.Text, ' '), + (Token.Text, 'foo'), + (Token.Text, '\n'), + (Keyword, '2.'), + (Token.Text, ' '), + (Token.Text, 'bar'), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +def test_quote(lexer): + fragment = '> a\n> quote' + tokens = [ + (Keyword, '> '), + (Generic.Emph, 'a\n'), + (Keyword, '> '), + (Generic.Emph, 'quote\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +def test_invalid_code_block(lexer): + fragments = ( + '```code```', + 'prefix not allowed before ```\ncode block\n```' + ' code', + ) + + for fragment in fragments: + for token, _ in lexer.get_tokens(fragment): + assert token != String.Backtick + + +def test_code_block_fenced_by_backticks(lexer): + fragments = ( + '```\ncode\n```', + '```\nmulti\n`line`\ncode\n```', + ) + for fragment in fragments: + tokens = [ + (String.Backtick, fragment), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +def test_code_block_with_language(lexer): + fragments = ( + '```python\nimport this\n```', + ) + for fragment in fragments: + tokens = [ + (String.Backtick, '```'), + (String.Backtick, 'python'), + (Token.Text, '\n'), + (Token.Keyword.Namespace, 'import'), + (Token.Text, ' '), + (Token.Name.Namespace, 'this'), + (Token.Text, '\n'), + (String.Backtick, '```'), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +def test_code_indented_with_spaces(lexer): + fragments = ( + 'sample:\n\n code\n', + ) + for fragment in fragments: + tokens = [ + (Token.Text, 'sample:'), + (Token.Text, '\n\n'), + (String.Backtick, ' code\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + fragments = ( + 'sample:\n\n\tcode\n', + ) + for fragment in fragments: + tokens = [ + (Token.Text, 'sample:'), + (Token.Text, '\n\n'), + (String.Backtick, '\tcode\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +def test_inline_code(lexer): + fragment = 'code: `code`' + tokens = [ + (Token.Text, 'code:'), + (Token.Text, ' '), + (String.Backtick, '`code`'), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + fragment = ' `**code**`' + tokens = [ + (Token.Text, ' '), + (String.Backtick, '`**code**`'), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + fragment = '(`code`)' + tokens = [ + (Token.Text, '('), + (String.Backtick, '`code`'), + (Token.Text, ')'), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +def test_invalid_bold(lexer): + fragments = ( + '**no bold__', + '__no bold**', + '*no bold*', + '_no bold_', + ) + + for fragment in fragments: + for token, _ in lexer.get_tokens(fragment): + assert token != Generic.Strong + + +def test_bold_fenced_by_asterisk(lexer): + fragment = '**bold**' + tokens = [ + (Generic.Strong, '**bold**'), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +def test_bold_fenced_by_underscore(lexer): + fragment = '__bold__' + tokens = [ + (Generic.Strong, '__bold__'), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +def test_invalid_italics(lexer): + fragments = ( + '*no italics_', + '_no italics*', + '**no italics**', + '__no italics__', + ) + + for fragment in fragments: + for token, _ in lexer.get_tokens(fragment): + assert token != Generic.Emph + + +def test_italics_fenced_by_asterisk(lexer): + fragment = '*italics*' + tokens = [ + (Generic.Emph, '*italics*'), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +def test_italics_fenced_by_underscore(lexer): + fragment = '_italics_' + tokens = [ + (Generic.Emph, '_italics_'), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +def test_escape_italics(lexer): + fragments = ( + r'\*no italics\*', + r'\_ no italics \_', + ) + + for fragment in fragments: + for token, _ in lexer.get_tokens(fragment): + assert token != Generic.Emph + + +def test_italics_no_multiline(lexer): + fragment = '*no\nitalics*' + + for token, _ in lexer.get_tokens(fragment): + assert token != Generic.Emph + + +def test_italics_and_bold(lexer): + fragment = '**bold** and *italics*' + tokens = [ + (Generic.Strong, '**bold**'), + (Token.Text, ' '), + (Token.Text, 'and'), + (Token.Text, ' '), + (Generic.Emph, '*italics*'), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + fragment = '*italics* and **bold**' + tokens = [ + (Generic.Emph, '*italics*'), + (Token.Text, ' '), + (Token.Text, 'and'), + (Token.Text, ' '), + (Generic.Strong, '**bold**'), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +def test_strikethrough(lexer): + fragment = '~~striked~~not striked' + tokens = [ + (Generic.Deleted, '~~striked~~'), + (Token.Text, 'not'), + (Token.Text, ' '), + (Token.Text, 'striked'), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +def test_mentions(lexer): + fragment = 'note for @me:' + tokens = [ + (Token.Text, 'note'), + (Token.Text, ' '), + (Token.Text, 'for'), + (Token.Text, ' '), + (Name.Entity, '@me:'), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +def test_topics(lexer): + fragment = 'message to #you:' + tokens = [ + (Token.Text, 'message'), + (Token.Text, ' '), + (Token.Text, 'to'), + (Token.Text, ' '), + (Name.Entity, '#you:'), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +def test_links(lexer): + fragment = '[text](link)' + tokens = [ + (Token.Text, '['), + (Token.Name.Tag, 'text'), + (Token.Text, ']'), + (Token.Text, '('), + (Token.Name.Attribute, 'link'), + (Token.Text, ')'), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + fragment = '' + tokens = [ + (Token.Text, '!['), + (Token.Name.Tag, 'Image of foo'), + (Token.Text, ']'), + (Token.Text, '('), + (Token.Name.Attribute, 'https://bar.baz'), + (Token.Text, ')'), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + +def test_reference_style_links(lexer): + fragment = '[an example][id]' + tokens = [ + (Token.Text, '['), + (Token.Name.Tag, 'an example'), + (Token.Text, ']'), + (Token.Text, '['), + (Token.Name.Label, 'id'), + (Token.Text, ']'), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens + + fragment = '[id]: http://example.com' + tokens = [ + (Token.Text, '['), + (Token.Name.Label, 'id'), + (Token.Text, ']: '), + (Token.Name.Attribute, 'http://example.com'), + (Token.Text, '\n'), + ] + assert list(lexer.get_tokens(fragment)) == tokens |