diff options
Diffstat (limited to 'coverage/phystokens.py')
-rw-r--r-- | coverage/phystokens.py | 23 |
1 files changed, 15 insertions, 8 deletions
diff --git a/coverage/phystokens.py b/coverage/phystokens.py index 2862490f..60b87932 100644 --- a/coverage/phystokens.py +++ b/coverage/phystokens.py @@ -5,16 +5,17 @@ from coverage.backward import StringIO # pylint: disable-msg=W0622 def phys_tokens(toks): """Return all physical tokens, even line continuations. - + tokenize.generate_tokens() doesn't return a token for the backslash that continues lines. This wrapper provides those tokens so that we can re-create a faithful representation of the original source. - + Returns the same values as generate_tokens() - + """ last_line = None last_lineno = -1 + last_ttype = None for ttype, ttext, (slineno, scol), (elineno, ecol), ltext in toks: if last_lineno != elineno: if last_line and last_line[-2:] == "\\\n": @@ -34,7 +35,11 @@ def phys_tokens(toks): # so we need to figure out if the backslash is already in the # string token or not. inject_backslash = True - if ttype == token.STRING: + if last_ttype == tokenize.COMMENT: + # Comments like this \ + # should never result in a new token. + inject_backslash = False + elif ttype == token.STRING: if "\n" in ttext and ttext.split('\n', 1)[0][-1] == '\\': # It's a multiline string and the first line ends with # a backslash, so we don't need to inject another. @@ -49,19 +54,20 @@ def phys_tokens(toks): last_line ) last_line = ltext + last_ttype = ttype yield ttype, ttext, (slineno, scol), (elineno, ecol), ltext last_lineno = elineno def source_token_lines(source): """Generate a series of lines, one for each line in `source`. - + Each line is a list of pairs, each pair is a token:: - + [('key', 'def'), ('ws', ' '), ('nam', 'hello'), ('op', '('), ... ] Each pair has a token class, and the token text. - + If you concatenate all the token texts, and then join them with newlines, you should have your original `source` back, with two differences: trailing whitespace is not preserved, and a final line with no newline @@ -71,7 +77,8 @@ def source_token_lines(source): ws_tokens = [token.INDENT, token.DEDENT, token.NEWLINE, tokenize.NL] line = [] col = 0 - tokgen = tokenize.generate_tokens(StringIO(source.expandtabs(8)).readline) + source = source.expandtabs(8).replace('\r\n', '\n') + tokgen = tokenize.generate_tokens(StringIO(source).readline) for ttype, ttext, (_, scol), (_, ecol), _ in phys_tokens(tokgen): mark_start = True for part in re.split('(\n)', ttext): |