diff options
Diffstat (limited to 'Lib/test/test_tokenize.py')
| -rw-r--r-- | Lib/test/test_tokenize.py | 681 | 
1 files changed, 493 insertions, 188 deletions
| diff --git a/Lib/test/test_tokenize.py b/Lib/test/test_tokenize.py index e59d9c672c..2ed723b87e 100644 --- a/Lib/test/test_tokenize.py +++ b/Lib/test/test_tokenize.py @@ -1,126 +1,501 @@ -"""Tests for the tokenize module. +doctests = """ +Tests for the tokenize module. -The tests were originally written in the old Python style, where the -test output was compared to a golden file.  This docstring represents -the first steps towards rewriting the entire test as a doctest. +    >>> import glob, random, sys -The tests can be really simple.  Given a small fragment of source -code, print out a table with the tokens.  The ENDMARK is omitted for +The tests can be really simple. Given a small fragment of source +code, print out a table with thokens. The ENDMARK is omitted for  brevity. ->>> dump_tokens("1 + 1") -NUMBER      '1'           (1, 0) (1, 1) -OP          '+'           (1, 2) (1, 3) -NUMBER      '1'           (1, 4) (1, 5) - -A comment generates a token here, unlike in the parser module.  The -comment token is followed by an NL or a NEWLINE token, depending on -whether the line contains the completion of a statement. - ->>> dump_tokens("if False:\\n" -...             "    # NL\\n" -...             "    a    = False # NEWLINE\\n") -NAME        'if'          (1, 0) (1, 2) -NAME        'False'       (1, 3) (1, 8) -OP          ':'           (1, 8) (1, 9) -NEWLINE     '\\n'          (1, 9) (1, 10) -COMMENT     '# NL'        (2, 4) (2, 8) -NL          '\\n'          (2, 8) (2, 9) -INDENT      '    '        (3, 0) (3, 4) -NAME        'a'           (3, 4) (3, 5) -OP          '='           (3, 9) (3, 10) -NAME        'False'       (3, 11) (3, 16) -COMMENT     '# NEWLINE'   (3, 17) (3, 26) -NEWLINE     '\\n'          (3, 26) (3, 27) -DEDENT      ''            (4, 0) (4, 0) - -' # Emacs hint - -There will be a bunch more tests of specific source patterns. - -The tokenize module also defines an untokenize function that should -regenerate the original program text from the tokens. - -There are some standard formatting practices that are easy to get right. - ->>> roundtrip("if x == 1:\\n" -...           "    print(x)\\n") -if x == 1: -    print(x) +    >>> dump_tokens("1 + 1") +    NUMBER     '1'           (1, 0) (1, 1) +    OP         '+'           (1, 2) (1, 3) +    NUMBER     '1'           (1, 4) (1, 5) + +    >>> dump_tokens("if False:\\n" +    ...             "    # NL\\n" +    ...             "    True = False # NEWLINE\\n") +    NAME       'if'          (1, 0) (1, 2) +    NAME       'False'       (1, 3) (1, 8) +    OP         ':'           (1, 8) (1, 9) +    NEWLINE    '\\n'          (1, 9) (1, 10) +    COMMENT    '# NL'        (2, 4) (2, 8) +    NL         '\\n'          (2, 8) (2, 9) +    INDENT     '    '        (3, 0) (3, 4) +    NAME       'True'        (3, 4) (3, 8) +    OP         '='           (3, 9) (3, 10) +    NAME       'False'       (3, 11) (3, 16) +    COMMENT    '# NEWLINE'   (3, 17) (3, 26) +    NEWLINE    '\\n'          (3, 26) (3, 27) +    DEDENT     ''            (4, 0) (4, 0) + +    >>> indent_error_file = \""" +    ... def k(x): +    ...     x += 2 +    ...   x += 5 +    ... \""" +    >>> for tok in generate_tokens(StringIO(indent_error_file).readline): pass +    Traceback (most recent call last): +        ... +    IndentationError: unindent does not match any outer indentation level + +Test roundtrip for `untokenize`. `f` is an open file or a string. The source +code in f is tokenized, converted back to source code via tokenize.untokenize(), +and tokenized again from the latter. The test fails if the second tokenization +doesn't match the first. + +    >>> def roundtrip(f): +    ...     if isinstance(f, str): f = StringIO(f) +    ...     token_list = list(generate_tokens(f.readline)) +    ...     f.close() +    ...     tokens1 = [tok[:2] for tok in token_list] +    ...     new_text = untokenize(tokens1) +    ...     readline = iter(new_text.splitlines(1)).__next__ +    ...     tokens2 = [tok[:2] for tok in generate_tokens(readline)] +    ...     return tokens1 == tokens2 +    ... + +There are some standard formattig practises that are easy to get right. + +    >>> roundtrip("if x == 1:\\n" +    ...           "    print(x)\\n") +    True + +    >>> roundtrip("# This is a comment\\n# This also") +    True  Some people use different formatting conventions, which makes -untokenize a little trickier.  Note that this test involves trailing -whitespace after the colon.  Note that we use hex escapes to make the -two trailing blanks apparent in the expected output. - ->>> roundtrip("if   x  ==  1  :  \\n" -...           "  print(x)\\n") -if   x  ==  1  :\x20\x20 -  print(x) - -Comments need to go in the right place. - ->>> roundtrip("if x == 1:\\n" -...           "    # A comment by itself.\\n" -...           "    print(x)  # Comment here, too.\\n" -...           "    # Another comment.\\n" -...           "after_if = True\\n") -if x == 1: -    # A comment by itself. -    print(x)  # Comment here, too. -    # Another comment. -after_if = True - ->>> roundtrip("if (x  # The comments need to go in the right place\\n" -...           "    == 1):\\n" -...           "    print('x == 1')\\n") -if (x  # The comments need to go in the right place -    == 1): -    print('x == 1') - +untokenize a little trickier. Note that this test involves trailing +whitespace after the colon. Note that we use hex escapes to make the +two trailing blanks apperant in the expected output. + +    >>> roundtrip("if x == 1 : \\n" +    ...           "  print(x)\\n") +    True + +    >>> f = test_support.findfile("tokenize_tests.txt") +    >>> roundtrip(open(f)) +    True + +    >>> roundtrip("if x == 1:\\n" +    ...           "    # A comment by itself.\\n" +    ...           "    print(x) # Comment here, too.\\n" +    ...           "    # Another comment.\\n" +    ...           "after_if = True\\n") +    True + +    >>> roundtrip("if (x # The comments need to go in the right place\\n" +    ...           "    == 1):\\n" +    ...           "    print('x==1')\\n") +    True + +    >>> roundtrip("class Test: # A comment here\\n" +    ...           "  # A comment with weird indent\\n" +    ...           "  after_com = 5\\n" +    ...           "  def x(m): return m*5 # a one liner\\n" +    ...           "  def y(m): # A whitespace after the colon\\n" +    ...           "     return y*4 # 3-space indent\\n") +    True + +Some error-handling code + +    >>> roundtrip("try: import somemodule\\n" +    ...           "except ImportError: # comment\\n" +    ...           "    print 'Can not import' # comment2\\n" +    ...           "else:   print 'Loaded'\\n") +    True + +Balancing contunuation + +    >>> roundtrip("a = (3,4, \\n" +    ...           "5,6)\\n" +    ...           "y = [3, 4,\\n" +    ...           "5]\\n" +    ...           "z = {'a': 5,\\n" +    ...           "'b':15, 'c':True}\\n" +    ...           "x = len(y) + 5 - a[\\n" +    ...           "3] - a[2]\\n" +    ...           "+ len(z) - z[\\n" +    ...           "'b']\\n") +    True + +Ordinary integers and binary operators + +    >>> dump_tokens("0xff <= 255") +    NUMBER     '0xff'        (1, 0) (1, 4) +    OP         '<='          (1, 5) (1, 7) +    NUMBER     '255'         (1, 8) (1, 11) +    >>> dump_tokens("01234567 > ~0x15") +    NUMBER     '01234567'    (1, 0) (1, 8) +    OP         '>'           (1, 9) (1, 10) +    OP         '~'           (1, 11) (1, 12) +    NUMBER     '0x15'        (1, 12) (1, 16) +    >>> dump_tokens("2134568 != 01231515") +    NUMBER     '2134568'     (1, 0) (1, 7) +    OP         '!='          (1, 8) (1, 10) +    NUMBER     '01231515'    (1, 11) (1, 19) +    >>> dump_tokens("(-124561-1) & 0200000000") +    OP         '('           (1, 0) (1, 1) +    OP         '-'           (1, 1) (1, 2) +    NUMBER     '124561'      (1, 2) (1, 8) +    OP         '-'           (1, 8) (1, 9) +    NUMBER     '1'           (1, 9) (1, 10) +    OP         ')'           (1, 10) (1, 11) +    OP         '&'           (1, 12) (1, 13) +    NUMBER     '0200000000'  (1, 14) (1, 24) +    >>> dump_tokens("0xdeadbeef != -1") +    NUMBER     '0xdeadbeef'  (1, 0) (1, 10) +    OP         '!='          (1, 11) (1, 13) +    OP         '-'           (1, 14) (1, 15) +    NUMBER     '1'           (1, 15) (1, 16) +    >>> dump_tokens("0xdeadc0de & 012345") +    NUMBER     '0xdeadc0de'  (1, 0) (1, 10) +    OP         '&'           (1, 11) (1, 12) +    NUMBER     '012345'      (1, 13) (1, 19) +    >>> dump_tokens("0xFF & 0x15 | 1234") +    NUMBER     '0xFF'        (1, 0) (1, 4) +    OP         '&'           (1, 5) (1, 6) +    NUMBER     '0x15'        (1, 7) (1, 11) +    OP         '|'           (1, 12) (1, 13) +    NUMBER     '1234'        (1, 14) (1, 18) + +Long integers + +    >>> dump_tokens("x = 0L") +    NAME       'x'           (1, 0) (1, 1) +    OP         '='           (1, 2) (1, 3) +    NUMBER     '0L'          (1, 4) (1, 6) +    >>> dump_tokens("x = 0xfffffffffff") +    NAME       'x'           (1, 0) (1, 1) +    OP         '='           (1, 2) (1, 3) +    NUMBER     '0xffffffffff (1, 4) (1, 17) +    >>> dump_tokens("x = 123141242151251616110l") +    NAME       'x'           (1, 0) (1, 1) +    OP         '='           (1, 2) (1, 3) +    NUMBER     '123141242151 (1, 4) (1, 26) +    >>> dump_tokens("x = -15921590215012591L") +    NAME       'x'           (1, 0) (1, 1) +    OP         '='           (1, 2) (1, 3) +    OP         '-'           (1, 4) (1, 5) +    NUMBER     '159215902150 (1, 5) (1, 23) + +Floating point numbers + +    >>> dump_tokens("x = 3.14159") +    NAME       'x'           (1, 0) (1, 1) +    OP         '='           (1, 2) (1, 3) +    NUMBER     '3.14159'     (1, 4) (1, 11) +    >>> dump_tokens("x = 314159.") +    NAME       'x'           (1, 0) (1, 1) +    OP         '='           (1, 2) (1, 3) +    NUMBER     '314159.'     (1, 4) (1, 11) +    >>> dump_tokens("x = .314159") +    NAME       'x'           (1, 0) (1, 1) +    OP         '='           (1, 2) (1, 3) +    NUMBER     '.314159'     (1, 4) (1, 11) +    >>> dump_tokens("x = 3e14159") +    NAME       'x'           (1, 0) (1, 1) +    OP         '='           (1, 2) (1, 3) +    NUMBER     '3e14159'     (1, 4) (1, 11) +    >>> dump_tokens("x = 3E123") +    NAME       'x'           (1, 0) (1, 1) +    OP         '='           (1, 2) (1, 3) +    NUMBER     '3E123'       (1, 4) (1, 9) +    >>> dump_tokens("x+y = 3e-1230") +    NAME       'x'           (1, 0) (1, 1) +    OP         '+'           (1, 1) (1, 2) +    NAME       'y'           (1, 2) (1, 3) +    OP         '='           (1, 4) (1, 5) +    NUMBER     '3e-1230'     (1, 6) (1, 13) +    >>> dump_tokens("x = 3.14e159") +    NAME       'x'           (1, 0) (1, 1) +    OP         '='           (1, 2) (1, 3) +    NUMBER     '3.14e159'    (1, 4) (1, 12) + +String literals + +    >>> dump_tokens("x = ''; y = \\\"\\\"") +    NAME       'x'           (1, 0) (1, 1) +    OP         '='           (1, 2) (1, 3) +    STRING     "''"          (1, 4) (1, 6) +    OP         ';'           (1, 6) (1, 7) +    NAME       'y'           (1, 8) (1, 9) +    OP         '='           (1, 10) (1, 11) +    STRING     '""'          (1, 12) (1, 14) +    >>> dump_tokens("x = '\\\"'; y = \\\"'\\\"") +    NAME       'x'           (1, 0) (1, 1) +    OP         '='           (1, 2) (1, 3) +    STRING     '\\'"\\''       (1, 4) (1, 7) +    OP         ';'           (1, 7) (1, 8) +    NAME       'y'           (1, 9) (1, 10) +    OP         '='           (1, 11) (1, 12) +    STRING     '"\\'"'        (1, 13) (1, 16) +    >>> dump_tokens("x = \\\"doesn't \\\"shrink\\\", does it\\\"") +    NAME       'x'           (1, 0) (1, 1) +    OP         '='           (1, 2) (1, 3) +    STRING     '"doesn\\'t "' (1, 4) (1, 14) +    NAME       'shrink'      (1, 14) (1, 20) +    STRING     '", does it"' (1, 20) (1, 31) +    >>> dump_tokens("x = u'abc' + U'ABC'") +    NAME       'x'           (1, 0) (1, 1) +    OP         '='           (1, 2) (1, 3) +    STRING     "u'abc'"      (1, 4) (1, 10) +    OP         '+'           (1, 11) (1, 12) +    STRING     "U'ABC'"      (1, 13) (1, 19) +    >>> dump_tokens('y = "ABC" + "ABC"') +    NAME       'y'           (1, 0) (1, 1) +    OP         '='           (1, 2) (1, 3) +    STRING     '"ABC"'      (1, 4) (1, 10) +    OP         '+'           (1, 11) (1, 12) +    STRING     '"ABC"'      (1, 13) (1, 19) +    >>> dump_tokens("x = r'abc' + r'ABC' + R'ABC' + R'ABC'") +    NAME       'x'           (1, 0) (1, 1) +    OP         '='           (1, 2) (1, 3) +    STRING     "r'abc'"     (1, 4) (1, 11) +    OP         '+'           (1, 12) (1, 13) +    STRING     "r'ABC'"     (1, 14) (1, 21) +    OP         '+'           (1, 22) (1, 23) +    STRING     "R'ABC'"     (1, 24) (1, 31) +    OP         '+'           (1, 32) (1, 33) +    STRING     "R'ABC'"     (1, 34) (1, 41) +    >>> dump_tokens('y = r"abc" + r"ABC" + R"ABC" + R"ABC"') +    NAME       'y'           (1, 0) (1, 1) +    OP         '='           (1, 2) (1, 3) +    STRING     'r"abc"'     (1, 4) (1, 11) +    OP         '+'           (1, 12) (1, 13) +    STRING     'r"ABC"'     (1, 14) (1, 21) +    OP         '+'           (1, 22) (1, 23) +    STRING     'R"ABC"'     (1, 24) (1, 31) +    OP         '+'           (1, 32) (1, 33) +    STRING     'R"ABC"'     (1, 34) (1, 41) + +Operators + +    >>> dump_tokens("def d22(a, b, c=2, d=2, *k): pass") +    NAME       'def'         (1, 0) (1, 3) +    NAME       'd22'         (1, 4) (1, 7) +    OP         '('           (1, 7) (1, 8) +    NAME       'a'           (1, 8) (1, 9) +    OP         ','           (1, 9) (1, 10) +    NAME       'b'           (1, 11) (1, 12) +    OP         ','           (1, 12) (1, 13) +    NAME       'c'           (1, 14) (1, 15) +    OP         '='           (1, 15) (1, 16) +    NUMBER     '2'           (1, 16) (1, 17) +    OP         ','           (1, 17) (1, 18) +    NAME       'd'           (1, 19) (1, 20) +    OP         '='           (1, 20) (1, 21) +    NUMBER     '2'           (1, 21) (1, 22) +    OP         ','           (1, 22) (1, 23) +    OP         '*'           (1, 24) (1, 25) +    NAME       'k'           (1, 25) (1, 26) +    OP         ')'           (1, 26) (1, 27) +    OP         ':'           (1, 27) (1, 28) +    NAME       'pass'        (1, 29) (1, 33) +    >>> dump_tokens("def d01v_(a=1, *k, **w): pass") +    NAME       'def'         (1, 0) (1, 3) +    NAME       'd01v_'       (1, 4) (1, 9) +    OP         '('           (1, 9) (1, 10) +    NAME       'a'           (1, 10) (1, 11) +    OP         '='           (1, 11) (1, 12) +    NUMBER     '1'           (1, 12) (1, 13) +    OP         ','           (1, 13) (1, 14) +    OP         '*'           (1, 15) (1, 16) +    NAME       'k'           (1, 16) (1, 17) +    OP         ','           (1, 17) (1, 18) +    OP         '**'          (1, 19) (1, 21) +    NAME       'w'           (1, 21) (1, 22) +    OP         ')'           (1, 22) (1, 23) +    OP         ':'           (1, 23) (1, 24) +    NAME       'pass'        (1, 25) (1, 29) + +Comparison + +    >>> dump_tokens("if 1 < 1 > 1 == 1 >= 5 <= 0x15 <= 0x12 != " + +    ...             "1 and 5 in 1 not in 1 is 1 or 5 is not 1: pass") +    NAME       'if'          (1, 0) (1, 2) +    NUMBER     '1'           (1, 3) (1, 4) +    OP         '<'           (1, 5) (1, 6) +    NUMBER     '1'           (1, 7) (1, 8) +    OP         '>'           (1, 9) (1, 10) +    NUMBER     '1'           (1, 11) (1, 12) +    OP         '=='          (1, 13) (1, 15) +    NUMBER     '1'           (1, 16) (1, 17) +    OP         '>='          (1, 18) (1, 20) +    NUMBER     '5'           (1, 21) (1, 22) +    OP         '<='          (1, 23) (1, 25) +    NUMBER     '0x15'        (1, 26) (1, 30) +    OP         '<='          (1, 31) (1, 33) +    NUMBER     '0x12'        (1, 34) (1, 38) +    OP         '!='          (1, 39) (1, 41) +    NUMBER     '1'           (1, 42) (1, 43) +    NAME       'and'         (1, 44) (1, 47) +    NUMBER     '5'           (1, 48) (1, 49) +    NAME       'in'          (1, 50) (1, 52) +    NUMBER     '1'           (1, 53) (1, 54) +    NAME       'not'         (1, 55) (1, 58) +    NAME       'in'          (1, 59) (1, 61) +    NUMBER     '1'           (1, 62) (1, 63) +    NAME       'is'          (1, 64) (1, 66) +    NUMBER     '1'           (1, 67) (1, 68) +    NAME       'or'          (1, 69) (1, 71) +    NUMBER     '5'           (1, 72) (1, 73) +    NAME       'is'          (1, 74) (1, 76) +    NAME       'not'         (1, 77) (1, 80) +    NUMBER     '1'           (1, 81) (1, 82) +    OP         ':'           (1, 82) (1, 83) +    NAME       'pass'        (1, 84) (1, 88) + +Shift + +    >>> dump_tokens("x = 1 << 1 >> 5") +    NAME       'x'           (1, 0) (1, 1) +    OP         '='           (1, 2) (1, 3) +    NUMBER     '1'           (1, 4) (1, 5) +    OP         '<<'          (1, 6) (1, 8) +    NUMBER     '1'           (1, 9) (1, 10) +    OP         '>>'          (1, 11) (1, 13) +    NUMBER     '5'           (1, 14) (1, 15) + +Additive + +    >>> dump_tokens("x = 1 - y + 15 - 01 + 0x124 + z + a[5]") +    NAME       'x'           (1, 0) (1, 1) +    OP         '='           (1, 2) (1, 3) +    NUMBER     '1'           (1, 4) (1, 5) +    OP         '-'           (1, 6) (1, 7) +    NAME       'y'           (1, 8) (1, 9) +    OP         '+'           (1, 10) (1, 11) +    NUMBER     '15'          (1, 12) (1, 14) +    OP         '-'           (1, 15) (1, 16) +    NUMBER     '01'          (1, 17) (1, 19) +    OP         '+'           (1, 20) (1, 21) +    NUMBER     '0x124'       (1, 22) (1, 27) +    OP         '+'           (1, 28) (1, 29) +    NAME       'z'           (1, 30) (1, 31) +    OP         '+'           (1, 32) (1, 33) +    NAME       'a'           (1, 34) (1, 35) +    OP         '['           (1, 35) (1, 36) +    NUMBER     '5'           (1, 36) (1, 37) +    OP         ']'           (1, 37) (1, 38) + +Multiplicative + +    >>> dump_tokens("x = 1//1*1/5*12%0x12") +    NAME       'x'           (1, 0) (1, 1) +    OP         '='           (1, 2) (1, 3) +    NUMBER     '1'           (1, 4) (1, 5) +    OP         '//'          (1, 5) (1, 7) +    NUMBER     '1'           (1, 7) (1, 8) +    OP         '*'           (1, 8) (1, 9) +    NUMBER     '1'           (1, 9) (1, 10) +    OP         '/'           (1, 10) (1, 11) +    NUMBER     '5'           (1, 11) (1, 12) +    OP         '*'           (1, 12) (1, 13) +    NUMBER     '12'          (1, 13) (1, 15) +    OP         '%'           (1, 15) (1, 16) +    NUMBER     '0x12'        (1, 16) (1, 20) + +Unary + +    >>> dump_tokens("~1 ^ 1 & 1 |1 ^ -1") +    OP         '~'           (1, 0) (1, 1) +    NUMBER     '1'           (1, 1) (1, 2) +    OP         '^'           (1, 3) (1, 4) +    NUMBER     '1'           (1, 5) (1, 6) +    OP         '&'           (1, 7) (1, 8) +    NUMBER     '1'           (1, 9) (1, 10) +    OP         '|'           (1, 11) (1, 12) +    NUMBER     '1'           (1, 12) (1, 13) +    OP         '^'           (1, 14) (1, 15) +    OP         '-'           (1, 16) (1, 17) +    NUMBER     '1'           (1, 17) (1, 18) +    >>> dump_tokens("-1*1/1+1*1//1 - ---1**1") +    OP         '-'           (1, 0) (1, 1) +    NUMBER     '1'           (1, 1) (1, 2) +    OP         '*'           (1, 2) (1, 3) +    NUMBER     '1'           (1, 3) (1, 4) +    OP         '/'           (1, 4) (1, 5) +    NUMBER     '1'           (1, 5) (1, 6) +    OP         '+'           (1, 6) (1, 7) +    NUMBER     '1'           (1, 7) (1, 8) +    OP         '*'           (1, 8) (1, 9) +    NUMBER     '1'           (1, 9) (1, 10) +    OP         '//'          (1, 10) (1, 12) +    NUMBER     '1'           (1, 12) (1, 13) +    OP         '-'           (1, 14) (1, 15) +    OP         '-'           (1, 16) (1, 17) +    OP         '-'           (1, 17) (1, 18) +    OP         '-'           (1, 18) (1, 19) +    NUMBER     '1'           (1, 19) (1, 20) +    OP         '**'          (1, 20) (1, 22) +    NUMBER     '1'           (1, 22) (1, 23) + +Selector + +    >>> dump_tokens("import sys, time\\nx = sys.modules['time'].time()") +    NAME       'import'      (1, 0) (1, 6) +    NAME       'sys'         (1, 7) (1, 10) +    OP         ','           (1, 10) (1, 11) +    NAME       'time'        (1, 12) (1, 16) +    NEWLINE    '\\n'          (1, 16) (1, 17) +    NAME       'x'           (2, 0) (2, 1) +    OP         '='           (2, 2) (2, 3) +    NAME       'sys'         (2, 4) (2, 7) +    OP         '.'           (2, 7) (2, 8) +    NAME       'modules'     (2, 8) (2, 15) +    OP         '['           (2, 15) (2, 16) +    STRING     "'time'"      (2, 16) (2, 22) +    OP         ']'           (2, 22) (2, 23) +    OP         '.'           (2, 23) (2, 24) +    NAME       'time'        (2, 24) (2, 28) +    OP         '('           (2, 28) (2, 29) +    OP         ')'           (2, 29) (2, 30) + +Methods + +    >>> dump_tokens("@staticmethod\\ndef foo(x,y): pass") +    OP         '@'           (1, 0) (1, 1) +    NAME       'staticmethod (1, 1) (1, 13) +    NEWLINE    '\\n'          (1, 13) (1, 14) +    NAME       'def'         (2, 0) (2, 3) +    NAME       'foo'         (2, 4) (2, 7) +    OP         '('           (2, 7) (2, 8) +    NAME       'x'           (2, 8) (2, 9) +    OP         ','           (2, 9) (2, 10) +    NAME       'y'           (2, 10) (2, 11) +    OP         ')'           (2, 11) (2, 12) +    OP         ':'           (2, 12) (2, 13) +    NAME       'pass'        (2, 14) (2, 18) + +Backslash means line continuation, except for comments + +    >>> roundtrip("x=1+\\\\n" +    ...           "1\\n" +    ...           "# This is a comment\\\\n" +    ...           "# This also\\n") +    True +    >>> roundtrip("# Comment \\\\nx = 0") +    True + +    >>> +    >>> tempdir = os.path.dirname(f) or os.curdir +    >>> testfiles = glob.glob(os.path.join(tempdir, "test*.py")) +    >>> if not test_support.is_resource_enabled("compiler"): +    ...     testfiles = random.sample(testfiles, 10) +    ... +    >>> for testfile in testfiles: +    ...     if not roundtrip(open(testfile)): break +    ... else: True +    True  """ -# ' Emacs hint -import os, glob, random, time, sys -import re +from test import test_support +from tokenize import (tokenize, untokenize, generate_tokens, NUMBER, NAME, OP, +                     STRING, ENDMARKER, tok_name)  from io import StringIO -from test.test_support import (verbose, findfile, is_resource_enabled, -                               TestFailed) -from tokenize import (tokenize, generate_tokens, untokenize, tok_name, -                      ENDMARKER, NUMBER, NAME, OP, STRING, COMMENT) - -# How much time in seconds can pass before we print a 'Still working' message. -_PRINT_WORKING_MSG_INTERVAL = 5 * 60 - -# Test roundtrip for `untokenize`.  `f` is a file path.  The source code in f -# is tokenized, converted back to source code via tokenize.untokenize(), -# and tokenized again from the latter.  The test fails if the second -# tokenization doesn't match the first. -def test_roundtrip(f): -    ## print('Testing:', f) -    # Get the encoding first -    fobj = open(f, encoding="latin-1") -    first2lines = fobj.readline() + fobj.readline() -    fobj.close() -    m = re.search(r"coding:\s*(\S+)", first2lines) -    if m: -        encoding = m.group(1) -        ## print("    coding:", encoding) -    else: -        encoding = "utf-8" -    fobj = open(f, encoding=encoding) -    try: -        fulltok = list(generate_tokens(fobj.readline)) -    finally: -        fobj.close() - -    t1 = [tok[:2] for tok in fulltok] -    newtext = untokenize(t1) -    readline = iter(newtext.splitlines(1)).__next__ -    t2 = [tok[:2] for tok in generate_tokens(readline)] -    if t1 != t2: -        raise TestFailed("untokenize() roundtrip failed for %r" % f) +import os  def dump_tokens(s):      """Print out the tokens in s in a table format. @@ -132,7 +507,7 @@ def dump_tokens(s):          if type == ENDMARKER:              break          type = tok_name[type] -        print("%(type)-10.10s  %(token)-13.13r %(start)s %(end)s" % locals()) +        print("%(type)-10.10s %(token)-13.13r %(start)s %(end)s" % locals())  def roundtrip(s):      f = StringIO(s) @@ -177,82 +552,12 @@ def decistmt(s):              result.append((toknum, tokval))      return untokenize(result) -def test_main(): -    if verbose: -        print('starting...') - -    next_time = time.time() + _PRINT_WORKING_MSG_INTERVAL - -    # Validate the tokenize_tests.txt file. -    # This makes sure it compiles, and displays any errors in it. -    f = open(findfile('tokenize_tests.txt')) -    sf = f.read() -    f.close() -    cf = compile(sf, 'tokenize_tests.txt', 'exec') - -    # This displays the tokenization of tokenize_tests.py to stdout, and -    # regrtest.py checks that this equals the expected output (in the -    # test/output/ directory). -    f = open(findfile('tokenize_tests.txt')) -    tokenize(f.readline) -    f.close() - -    # Now run test_roundtrip() over test_tokenize.py too, and over all -    # (if the "compiler" resource is enabled) or a small random sample (if -    # "compiler" is not enabled) of the test*.py files. -    f = findfile('test_tokenize.py') -    if verbose: -        print('    round trip: ', f, file=sys.__stdout__) -    test_roundtrip(f) - -    testdir = os.path.dirname(f) or os.curdir -    testfiles = glob.glob(testdir + os.sep + 'test*.py') -    if not is_resource_enabled('compiler'): -        testfiles = random.sample(testfiles, 10) - -    for f in testfiles: -        # Print still working message since this test can be really slow -        if verbose: -            print('    round trip: ', f, file=sys.__stdout__) -        if next_time <= time.time(): -            next_time = time.time() + _PRINT_WORKING_MSG_INTERVAL -            print('  test_main still working, be patient...', file=sys.__stdout__) -            sys.__stdout__.flush() - -        test_roundtrip(f) - -    # Test detecton of IndentationError. -    sampleBadText = """\ -def foo(): -    bar -  baz -""" - -    try: -        for tok in generate_tokens(StringIO(sampleBadText).readline): -            pass -    except IndentationError: -        pass -    else: -        raise TestFailed("Did not detect IndentationError:") - -    # Run the doctests in this module. -    from test import test_tokenize  # i.e., this module -    from test.test_support import run_doctest -    run_doctest(test_tokenize, verbose) -    if verbose: -        print('finished') +__test__ = {"doctests" : doctests, 'decistmt': decistmt} -def test_rarrow(): -    """ -    This function exists solely to test the tokenization of the RARROW -    operator. - -    >>> tokenize(iter(['->']).__next__)   #doctest: +NORMALIZE_WHITESPACE -    1,0-1,2:\tOP\t'->' -    2,0-2,0:\tENDMARKER\t'' -    """ +def test_main(): +    from test import test_tokenize +    test_support.run_doctest(test_tokenize, True)  if __name__ == "__main__":      test_main() | 
