"""Code parsing for Coverage."""

import re, sys, token, tokenize, types

from coverage.misc import nice_pair, CoverageException
from coverage.backward import set, StringIO   # pylint: disable-msg=W0622


class CodeParser:
    """Parse code to find executable lines, excluded lines, etc."""
    
    def __init__(self):
        self.show_tokens = False

        # The text lines of the parsed code.
        self.lines = None

        # The line numbers of excluded lines of code.
        self.excluded = set()
        
        # The line numbers of docstring lines.
        self.docstrings = set()
        
        # A dict mapping line numbers to (lo,hi) for multi-line statements.
        self.multiline = {}
        
        # The line numbers that start statements.
        self.statement_starts = set()

        self.bytes_lines = []
        
    # Getting numbers from the lnotab value changed in Py3.0.    
    if sys.hexversion >= 0x03000000:
        def _lnotab_increments(self, lnotab):
            """Return a list of ints from the lnotab bytes in 3.x"""
            return list(lnotab)
    else:
        def _lnotab_increments(self, lnotab):
            """Return a list of ints from the lnotab string in 2.x"""
            return [ord(c) for c in lnotab]

    def _find_statement_starts(self, code):
        """Find the starts of statements in compiled code.
    
        Uses co_lnotab described in Python/compile.c to find line numbers that
        start statements, adding them to `self.statement_starts`.
    
        """
        # Adapted from dis.py in the standard library.
        byte_increments = self._lnotab_increments(code.co_lnotab[0::2])
        line_increments = self._lnotab_increments(code.co_lnotab[1::2])
    
        last_line_num = None
        line_num = code.co_firstlineno
        byte_num = 0
        for byte_incr, line_incr in zip(byte_increments, line_increments):
            if byte_incr:
                if line_num != last_line_num:
                    self.bytes_lines.append((byte_num, line_num))
                    self.statement_starts.add(line_num)
                    last_line_num = line_num
                byte_num += byte_incr
            line_num += line_incr
        if line_num != last_line_num:
            self.bytes_lines.append((byte_num, line_num))
            self.statement_starts.add(line_num)

    def _find_statements(self, code):
        """Find the statements in `code`.
        
        Update `self.statement_starts`, a set of line numbers that start
        statements.  Recurses into all code objects reachable from `code`.
        
        """
        # Adapted from trace.py in the standard library.

        # Get all of the lineno information from this code.
        self._find_statement_starts(code)
    
        # Check the constants for references to other code objects.
        for c in code.co_consts:
            if isinstance(c, types.CodeType):
                # Found another code object, so recurse into it.
                self._find_statements(c)

    def _raw_parse(self, text=None, filename=None, exclude=None):
        """Parse `text` to find the interesting facts about its lines.
        
        A handful of member fields are updated.
        
        """
        if not text:
            sourcef = open(filename, 'rU')
            text = sourcef.read()
            sourcef.close()
        text = text.replace('\r\n', '\n')
        self.lines = text.split('\n')

        # Find lines which match an exclusion pattern.
        if exclude:
            re_exclude = re.compile(exclude)
            for i, ltext in enumerate(self.lines):
                if re_exclude.search(ltext):
                    self.excluded.add(i+1)
    
        # Tokenize, to find excluded suites, to find docstrings, and to find
        # multi-line statements.
        indent = 0
        exclude_indent = 0
        excluding = False
        prev_toktype = token.INDENT
        first_line = None

        tokgen = tokenize.generate_tokens(StringIO(text).readline)
        for toktype, ttext, (slineno, _), (elineno, _), ltext in tokgen:
            if self.show_tokens:
                print("%10s %5s %-20r %r" % (
                    tokenize.tok_name.get(toktype, toktype),
                    nice_pair((slineno, elineno)), ttext, ltext
                    ))
            if toktype == token.INDENT:
                indent += 1
            elif toktype == token.DEDENT:
                indent -= 1
            elif toktype == token.OP and ttext == ':':
                if not excluding and elineno in self.excluded:
                    # Start excluding a suite.  We trigger off of the colon
                    # token so that the #pragma comment will be recognized on
                    # the same line as the colon.
                    exclude_indent = indent
                    excluding = True
            elif toktype == token.STRING and prev_toktype == token.INDENT:
                # Strings that are first on an indented line are docstrings.
                # (a trick from trace.py in the stdlib.)
                for i in range(slineno, elineno+1):
                    self.docstrings.add(i)
            elif toktype == token.NEWLINE:
                if first_line is not None and elineno != first_line:
                    # We're at the end of a line, and we've ended on a
                    # different line than the first line of the statement,
                    # so record a multi-line range.
                    rng = (first_line, elineno)
                    for l in range(first_line, elineno+1):
                        self.multiline[l] = rng
                first_line = None
                
            if ttext.strip() and toktype != tokenize.COMMENT:
                # A non-whitespace token.
                if first_line is None:
                    # The token is not whitespace, and is the first in a
                    # statement.
                    first_line = slineno
                    # Check whether to end an excluded suite.
                    if excluding and indent <= exclude_indent:
                        excluding = False
                    if excluding:
                        self.excluded.add(elineno)
                        
            prev_toktype = toktype

        # Find the starts of the executable statements.
        filename = filename or "<code>"
        try:
            # Python 2.3 and 2.4 don't like partial last lines, so be sure the
            # text ends nicely for them.
            text += '\n'
            code = compile(text, filename, "exec")
        except SyntaxError:
            _, synerr, _ = sys.exc_info()
            raise CoverageException(
                "Couldn't parse '%s' as Python source: '%s' at line %d" %
                    (filename, synerr.msg, synerr.lineno)
                )

        self._find_statements(code)

    def _map_to_first_line(self, lines, ignore=None):
        """Map the line numbers in `lines` to the correct first line of the
        statement.
        
        Skip any line mentioned in `ignore`.
        
        Returns a sorted list of the first lines.
        
        """
        ignore = ignore or []
        lset = set()
        for l in lines:
            if l in ignore:
                continue
            rng = self.multiline.get(l)
            if rng:
                new_l = rng[0]
            else:
                new_l = l
            if new_l not in ignore:
                lset.add(new_l)
        lines = list(lset)
        lines.sort()
        return lines
    
    def parse_source(self, text=None, filename=None, exclude=None):
        """Parse source text to find executable lines, excluded lines, etc.
        
        Source can be provided as `text`, the text itself, or `filename`, from
        which text will be read.  Excluded lines are those that match `exclude`,
        a regex.
        
        Return values are 1) a sorted list of executable line numbers,
        2) a sorted list of excluded line numbers, and 3) a dict mapping line
        numbers to pairs (lo,hi) for multi-line statements.
        
        """
        self._raw_parse(text, filename, exclude)
        
        excluded_lines = self._map_to_first_line(self.excluded)
        ignore = excluded_lines + list(self.docstrings)
        lines = self._map_to_first_line(self.statement_starts, ignore)
    
        return lines, excluded_lines, self.multiline

    def _find_byte_chunks(self, code):
        import opcode
        
        code = code.co_code
        #labels = findlabels(code)
        #linestarts = dict(findlinestarts(co))
        n = len(code)
        i = 0
        extended_arg = 0
        free = None
        while i < n:
            c = code[i]
            op = ord(c)
            print repr(i).rjust(4),
            print opcode.opname[op].ljust(20)
            i = i+1
            if op >= opcode.HAVE_ARGUMENT:
                oparg = ord(code[i]) + ord(code[i+1])*256 + extended_arg
                extended_arg = 0
                i = i+2
                if op == opcode.EXTENDED_ARG:
                    extended_arg = oparg*65536L
        
    def _disassemble(self, code):
        """Disassemble code, for ad-hoc experimenting."""
        
        import dis
        dis.dis(code)

        for c in code.co_consts:
            if isinstance(c, types.CodeType):
                # Found another code object, so recurse into it.
                print("\n%s:" % c)
                self._disassemble(c)

        print("")
        
    def adhoc_main(self, args):
        """A main function for trying the code from the command line."""

        from optparse import OptionParser

        parser = OptionParser()
        parser.add_option(
            "-c", action="store_true", dest="chunks", help="Show byte chunks"
            )
        parser.add_option(
            "-d", action="store_true", dest="dis", help="Disassemble"
            )
        parser.add_option(
            "-t", action="store_true", dest="tokens", help="Show tokens"
            )
        
        options, args = parser.parse_args()
        filename = args[0]

        if options.dis or options.chunks:        
            source = open(filename).read()
            code = compile(source, filename, "exec")

        if options.dis:
            print("Main code:")
            self._disassemble(code)

        if options.chunks:
            self._find_byte_chunks(code)

        self.show_tokens = options.tokens
        self._raw_parse(filename=filename, exclude=r"no\s*cover")

        print self.bytes_lines
        for i, ltext in enumerate(self.lines):
            lineno = i+1
            m0 = m1 = m2 = ' '
            if lineno in self.statement_starts:
                m0 = '-'
            if lineno in self.docstrings:
                m1 = '"'
            if lineno in self.excluded:
                m2 = 'x'
            print("%4d %s%s%s %s" % (lineno, m0, m1, m2, ltext))


if __name__ == '__main__':
    CodeParser().adhoc_main(sys.argv[1:])