tests/test_syntax.py


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166


import os
import markdown
import codecs
import difflib
import nose

import sys
if sys.version_info[0] == 3:
    from configparser import SafeConfigParser
else:
    from ConfigParser import SafeConfigParser

class MarkdownSyntaxError(Exception):
    pass


class CustomConfigParser(SafeConfigParser):
    def get(self, section, option):
        value = SafeConfigParser.get(self, section, option)
        if option == 'extensions':
            if len(value.strip()):
                return value.split(',')
            else:
                return []
        if value.lower() in ['yes', 'true', 'on', '1']:
            return True
        if value.lower() in ['no', 'false', 'off', '0']:
            return False
        return value
try:
    import tidy
except ImportError:
    tidy = None


test_dir = os.path.abspath(os.path.dirname(__file__))

def relpath(path, start=test_dir):
    """ reimplement relpath for python 2.3-2.5 from 2.6 """
    if not path:
        raise ValueError('no path secified')
    start_list = os.path.abspath(start).split(os.path.sep)
    path_list = os.path.abspath(path).split(os.path.sep)
    # Work out how much of the filepath is shared by start and path.
    i = len(os.path.commonprefix([start_list, path_list]))
    rel_list = [os.path.pardir] * (len(start_list)-i) + path_list[i:]
    if not rel_list:
        return test_dir
    return os.path.join(*rel_list)

def get_config(dir_name):
    """ Get config for given directory name. """
    config = CustomConfigParser({'normalize': '0',
                                      'skip': '0',
                                      'input_ext': '.txt',
                                      'output_ext': '.html'})
    config.read(os.path.join(dir_name, 'test.cfg'))
    return config

def get_section(file, config):
    """ Get name of config section for given file. """
    filename = os.path.basename(file)
    if config.has_section(filename):
        return filename
    else:
        return 'DEFAULT'

def get_args(file, config):
    """ Get args to pass to markdown from config for a given file. """
    args = {}
    section = get_section(file, config)
    for key, v in config.items(section):
        # Filter out args unique to testing framework
        if key not in ['normalize', 'skip', 'input_ext', 'output_ext']:
            args[key] = config.get(section, key)
    return args

def normalize(text):
    """ Normalize whitespace for a string of html using tidy. """
    return str(tidy.parseString(text.encode('utf-8', 'xmlcharrefreplace'), 
                                    drop_empty_paras=0,
                                    fix_backslash=0,
                                    fix_bad_comments=0,
                                    fix_uri=0,
                                    join_styles=0,
                                    lower_literals=0,
                                    merge_divs=0,
                                    output_xhtml=1,
                                    quote_ampersand=0,
                                    show_body_only=1,
                                    char_encoding='utf8',
                                    newline='LF')).decode('string-escape')

class CheckSyntax(object):
    def __init__(self, description=None):
        if description:
            self.description = 'TestSyntax: "%s"' % description

    def __call__(self, file, config):
        """ Compare expected output to actual output and report result. """
        cfg_section = get_section(file, config)
        if config.get(cfg_section, 'skip'):
            raise nose.plugins.skip.SkipTest('Test skipped per config.')
        input_file = file + config.get(cfg_section, 'input_ext')
        with codecs.open(input_file, encoding="utf-8") as f:
            input = f.read()
        output_file = file + config.get(cfg_section, 'output_ext') 
        with codecs.open(output_file, encoding="utf-8") as f:
            # Normalize line endings (on windows, git may have altered line endings).
            expected_output = f.read().replace("\r\n", "\n")
        output = markdown.markdown(input, **get_args(file, config))
        if tidy and config.get(cfg_section, 'normalize'):
            # Normalize whitespace with Tidy before comparing.
            expected_output = normalize(expected_output)
            output = normalize(output)
        elif config.get(cfg_section, 'normalize'):
            # Tidy is not available. Skip this test.
            raise nose.plugins.skip.SkipTest('Test skipped. Tidy not available in system.')
        diff = [l for l in difflib.unified_diff(expected_output.splitlines(True),
                                                output.splitlines(True), 
                                                output_file, 
                                                'actual_output.html', 
                                                n=3)]
        if diff:
            raise MarkdownSyntaxError('Output from "%s" failed to match expected '
                                           'output.\n\n%s' % (input_file, ''.join(diff)))

def TestSyntax():
    for dir_name, sub_dirs, files in os.walk(test_dir):
        # Get dir specific config settings.
        config = get_config(dir_name)
        # Loop through files and generate tests.
        for file in files:
            root, ext = os.path.splitext(file)
            if ext == config.get(get_section(file, config), 'input_ext'):
                path = os.path.join(dir_name, root)
                check_syntax = CheckSyntax(description=relpath(path, test_dir))
                yield check_syntax, path, config

def generate(file, config):
    """ Write expected output file for given input. """
    cfg_section = get_section(file, config)
    if config.get(cfg_section, 'skip'):
        print('Skipping:', file)
        return None
    input_file = file + config.get(cfg_section, 'input_ext')
    output_file = file + config.get(cfg_section, 'output_ext') 
    if not os.path.isfile(output_file) or \
            os.path.getmtime(output_file) < os.path.getmtime(input_file):
        print('Generating:', file)
        markdown.markdownFromFile(input=input_file, output=output_file, 
                                  encoding='utf-8', **get_args(file, config))
    else:
        print('Already up-to-date:', file)

def generate_all():
    """ Generate expected output for all outdated tests. """
    for dir_name, sub_dirs, files in os.walk(test_dir):
        # Get dir specific config settings.
        config = get_config(dir_name)
        # Loop through files and generate tests.
        for file in files:
            root, ext = os.path.splitext(file)
            if ext == config.get(get_section(file, config), 'input_ext'):
                generate(os.path.join(dir_name, root), config)