diff options
Diffstat (limited to 'tests/test_examplefiles.py')
| -rw-r--r-- | tests/test_examplefiles.py | 138 | 
1 files changed, 138 insertions, 0 deletions
diff --git a/tests/test_examplefiles.py b/tests/test_examplefiles.py new file mode 100644 index 00000000..28037a55 --- /dev/null +++ b/tests/test_examplefiles.py @@ -0,0 +1,138 @@ +# -*- coding: utf-8 -*- +""" +    Pygments tests with example files +    ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + +    :copyright: Copyright 2006-2015 by the Pygments team, see AUTHORS. +    :license: BSD, see LICENSE for details. +""" + +from __future__ import print_function + +import os +import pprint +import difflib +import pickle + +from pygments.lexers import get_lexer_for_filename, get_lexer_by_name +from pygments.token import Error +from pygments.util import ClassNotFound + +import support + +STORE_OUTPUT = False + +STATS = {} + +TESTDIR = os.path.dirname(__file__) + +# Jython generates a StackOverflowError for repetitions of the form (a|b)+, +# which are commonly used in string patterns, when matching more than about 1000 +# chars.  These tests do not complete.  See http://bugs.jython.org/issue1965 +BAD_FILES_FOR_JYTHON = ('Object.st', 'all.nit', 'genclass.clj', +                        'ragel-cpp_rlscan') + +def test_example_files(): +    global STATS +    STATS = {} +    outdir = os.path.join(TESTDIR, 'examplefiles', 'output') +    if STORE_OUTPUT and not os.path.isdir(outdir): +        os.makedirs(outdir) +    for fn in os.listdir(os.path.join(TESTDIR, 'examplefiles')): +        if fn.startswith('.') or fn.endswith('#'): +            continue + +        absfn = os.path.join(TESTDIR, 'examplefiles', fn) +        if not os.path.isfile(absfn): +            continue + +        extension = os.getenv('TEST_EXT') +        if extension and not absfn.endswith(extension): +            continue + +        print(absfn) +        with open(absfn, 'rb') as f: +            code = f.read() +        try: +            code = code.decode('utf-8') +        except UnicodeError: +            code = code.decode('latin1') + +        lx = None +        if '_' in fn: +            try: +                lx = get_lexer_by_name(fn.split('_')[0]) +            except ClassNotFound: +                pass +        if lx is None: +            try: +                lx = get_lexer_for_filename(absfn, code=code) +            except ClassNotFound: +                raise AssertionError('file %r has no registered extension, ' +                                     'nor is of the form <lexer>_filename ' +                                     'for overriding, thus no lexer found.' +                                     % fn) +        yield check_lexer, lx, fn + +    N = 7 +    stats = list(STATS.items()) +    stats.sort(key=lambda x: x[1][1]) +    print('\nExample files that took longest absolute time:') +    for fn, t in stats[-N:]: +        print('%-30s  %6d chars  %8.2f ms  %7.3f ms/char' % ((fn,) + t)) +    print() +    stats.sort(key=lambda x: x[1][2]) +    print('\nExample files that took longest relative time:') +    for fn, t in stats[-N:]: +        print('%-30s  %6d chars  %8.2f ms  %7.3f ms/char' % ((fn,) + t)) + + +def check_lexer(lx, fn): +    if os.name == 'java' and fn in BAD_FILES_FOR_JYTHON: +        raise support.SkipTest('%s is a known bad file on Jython' % fn) +    absfn = os.path.join(TESTDIR, 'examplefiles', fn) +    with open(absfn, 'rb') as fp: +        text = fp.read() +    text = text.replace(b'\r\n', b'\n') +    text = text.strip(b'\n') + b'\n' +    try: +        text = text.decode('utf-8') +        if text.startswith(u'\ufeff'): +            text = text[len(u'\ufeff'):] +    except UnicodeError: +        text = text.decode('latin1') +    ntext = [] +    tokens = [] +    import time +    t1 = time.time() +    for type, val in lx.get_tokens(text): +        ntext.append(val) +        assert type != Error, \ +            'lexer %s generated error token for %s: %r at position %d' % \ +            (lx, absfn, val, len(u''.join(ntext))) +        tokens.append((type, val)) +    t2 = time.time() +    STATS[os.path.basename(absfn)] = (len(text), +                                      1000 * (t2 - t1), 1000 * (t2 - t1) / len(text)) +    if u''.join(ntext) != text: +        print('\n'.join(difflib.unified_diff(u''.join(ntext).splitlines(), +                                             text.splitlines()))) +        raise AssertionError('round trip failed for ' + absfn) + +    # check output against previous run if enabled +    if STORE_OUTPUT: +        # no previous output -- store it +        outfn = os.path.join(TESTDIR, 'examplefiles', 'output', fn) +        if not os.path.isfile(outfn): +            with open(outfn, 'wb') as fp: +                pickle.dump(tokens, fp) +            return +        # otherwise load it and compare +        with open(outfn, 'rb') as fp: +            stored_tokens = pickle.load(fp) +        if stored_tokens != tokens: +            f1 = pprint.pformat(stored_tokens) +            f2 = pprint.pformat(tokens) +            print('\n'.join(difflib.unified_diff(f1.splitlines(), +                                                 f2.splitlines()))) +            assert False, absfn  | 
