diff options
| author | Serhiy Storchaka <storchaka@gmail.com> | 2016-05-08 23:43:50 +0300 | 
|---|---|---|
| committer | Serhiy Storchaka <storchaka@gmail.com> | 2016-05-08 23:43:50 +0300 | 
| commit | 02d9f5e5b2ee2662cb6776ebdafa2f3169452e41 (patch) | |
| tree | b5f50062b0813e8d9f6df7c63067bc21201b21e3 | |
| parent | c7cc9850d49354e9e93601d649a3c2bf60f72df8 (diff) | |
| download | cpython-git-02d9f5e5b2ee2662cb6776ebdafa2f3169452e41.tar.gz | |
Issue #26881: The modulefinder module now supports extended opcode arguments.
| -rw-r--r-- | Lib/dis.py | 55 | ||||
| -rw-r--r-- | Lib/modulefinder.py | 45 | ||||
| -rw-r--r-- | Lib/test/test_modulefinder.py | 13 | ||||
| -rw-r--r-- | Misc/NEWS | 4 | 
4 files changed, 61 insertions, 56 deletions
diff --git a/Lib/dis.py b/Lib/dis.py index 841208ffa1..09776fea02 100644 --- a/Lib/dis.py +++ b/Lib/dis.py @@ -275,31 +275,17 @@ def _get_instructions_bytes(code, varnames=None, names=None, constants=None,      """      labels = findlabels(code) -    extended_arg = 0      starts_line = None      free = None -    # enumerate() is not an option, since we sometimes process -    # multiple elements on a single pass through the loop -    n = len(code) -    i = 0 -    while i < n: -        op = code[i] -        offset = i +    for offset, op, arg in _unpack_opargs(code):          if linestarts is not None: -            starts_line = linestarts.get(i, None) +            starts_line = linestarts.get(offset, None)              if starts_line is not None:                  starts_line += line_offset -        is_jump_target = i in labels -        i = i+1 -        arg = None +        is_jump_target = offset in labels          argval = None          argrepr = '' -        if op >= HAVE_ARGUMENT: -            arg = code[i] + code[i+1]*256 + extended_arg -            extended_arg = 0 -            i = i+2 -            if op == EXTENDED_ARG: -                extended_arg = arg*65536 +        if arg is not None:              #  Set argval to the dereferenced value of the argument when              #  availabe, and argrepr to the string representation of argval.              #    _disassemble_bytes needs the string repr of the @@ -310,7 +296,7 @@ def _get_instructions_bytes(code, varnames=None, names=None, constants=None,              elif op in hasname:                  argval, argrepr = _get_name_info(arg, names)              elif op in hasjrel: -                argval = i + arg +                argval = offset + 3 + arg                  argrepr = "to " + repr(argval)              elif op in haslocal:                  argval, argrepr = _get_name_info(arg, varnames) @@ -320,7 +306,7 @@ def _get_instructions_bytes(code, varnames=None, names=None, constants=None,              elif op in hasfree:                  argval, argrepr = _get_name_info(arg, cells)              elif op in hasnargs: -                argrepr = "%d positional, %d keyword pair" % (code[i-2], code[i-1]) +                argrepr = "%d positional, %d keyword pair" % (arg%256, arg//256)          yield Instruction(opname[op], op,                            arg, argval, argrepr,                            offset, starts_line, is_jump_target) @@ -356,26 +342,37 @@ def _disassemble_str(source, *, file=None):  disco = disassemble                     # XXX For backwards compatibility -def findlabels(code): -    """Detect all offsets in a byte code which are jump targets. - -    Return the list of offsets. - -    """ -    labels = [] +def _unpack_opargs(code):      # enumerate() is not an option, since we sometimes process      # multiple elements on a single pass through the loop +    extended_arg = 0      n = len(code)      i = 0      while i < n:          op = code[i] +        offset = i          i = i+1 +        arg = None          if op >= HAVE_ARGUMENT: -            arg = code[i] + code[i+1]*256 +            arg = code[i] + code[i+1]*256 + extended_arg +            extended_arg = 0              i = i+2 +            if op == EXTENDED_ARG: +                extended_arg = arg*65536 +        yield (offset, op, arg) + +def findlabels(code): +    """Detect all offsets in a byte code which are jump targets. + +    Return the list of offsets. + +    """ +    labels = [] +    for offset, op, arg in _unpack_opargs(code): +        if arg is not None:              label = -1              if op in hasjrel: -                label = i+arg +                label = offset + 3 + arg              elif op in hasjabs:                  label = arg              if label >= 0: diff --git a/Lib/modulefinder.py b/Lib/modulefinder.py index 50f2462da0..b8cce1f766 100644 --- a/Lib/modulefinder.py +++ b/Lib/modulefinder.py @@ -13,13 +13,12 @@ with warnings.catch_warnings():      warnings.simplefilter('ignore', PendingDeprecationWarning)      import imp -# XXX Clean up once str8's cstor matches bytes. -LOAD_CONST = bytes([dis.opname.index('LOAD_CONST')]) -IMPORT_NAME = bytes([dis.opname.index('IMPORT_NAME')]) -STORE_NAME = bytes([dis.opname.index('STORE_NAME')]) -STORE_GLOBAL = bytes([dis.opname.index('STORE_GLOBAL')]) -STORE_OPS = [STORE_NAME, STORE_GLOBAL] -HAVE_ARGUMENT = bytes([dis.HAVE_ARGUMENT]) +LOAD_CONST = dis.opmap['LOAD_CONST'] +IMPORT_NAME = dis.opmap['IMPORT_NAME'] +STORE_NAME = dis.opmap['STORE_NAME'] +STORE_GLOBAL = dis.opmap['STORE_GLOBAL'] +STORE_OPS = STORE_NAME, STORE_GLOBAL +EXTENDED_ARG = dis.EXTENDED_ARG  # Modulefinder does a good job at simulating Python's, but it can not  # handle __path__ modifications packages make at runtime.  Therefore there @@ -337,38 +336,30 @@ class ModuleFinder:                          fullname = name + "." + sub                          self._add_badmodule(fullname, caller) -    def scan_opcodes_25(self, co, -                     unpack = struct.unpack): +    def scan_opcodes(self, co):          # Scan the code, and yield 'interesting' opcode combinations -        # Python 2.5 version (has absolute and relative imports)          code = co.co_code          names = co.co_names          consts = co.co_consts -        LOAD_LOAD_AND_IMPORT = LOAD_CONST + LOAD_CONST + IMPORT_NAME -        while code: -            c = bytes([code[0]]) -            if c in STORE_OPS: -                oparg, = unpack('<H', code[1:3]) +        opargs = [(op, arg) for _, op, arg in dis._unpack_opargs(code) +                  if op != EXTENDED_ARG] +        for i, (op, oparg) in enumerate(opargs): +            if op in STORE_OPS:                  yield "store", (names[oparg],) -                code = code[3:]                  continue -            if code[:9:3] == LOAD_LOAD_AND_IMPORT: -                oparg_1, oparg_2, oparg_3 = unpack('<xHxHxH', code[:9]) -                level = consts[oparg_1] +            if (op == IMPORT_NAME and i >= 2 +                    and opargs[i-1][0] == opargs[i-2][0] == LOAD_CONST): +                level = consts[opargs[i-2][1]] +                fromlist = consts[opargs[i-1][1]]                  if level == 0: # absolute import -                    yield "absolute_import", (consts[oparg_2], names[oparg_3]) +                    yield "absolute_import", (fromlist, names[oparg])                  else: # relative import -                    yield "relative_import", (level, consts[oparg_2], names[oparg_3]) -                code = code[9:] +                    yield "relative_import", (level, fromlist, names[oparg])                  continue -            if c >= HAVE_ARGUMENT: -                code = code[3:] -            else: -                code = code[1:]      def scan_code(self, co, m):          code = co.co_code -        scanner = self.scan_opcodes_25 +        scanner = self.scan_opcodes          for what, args in scanner(co):              if what == "store":                  name, = args diff --git a/Lib/test/test_modulefinder.py b/Lib/test/test_modulefinder.py index 4c49e9aeaf..e4df2a90d4 100644 --- a/Lib/test/test_modulefinder.py +++ b/Lib/test/test_modulefinder.py @@ -319,6 +319,19 @@ class ModuleFinderTest(unittest.TestCase):          expected = "co_filename %r changed to %r" % (old_path, new_path)          self.assertIn(expected, output) +    def test_extended_opargs(self): +        extended_opargs_test = [ +            "a", +            ["a", "b"], +            [], [], +            """\ +a.py +                                %r +                                import b +b.py +""" % list(range(2**16))]  # 2**16 constants +        self._do_test(extended_opargs_test) +  if __name__ == "__main__":      unittest.main() @@ -116,6 +116,8 @@ Core and Builtins  Library  ------- +- Issue #26881: The modulefinder module now supports extended opcode arguments. +  - Issue #23815: Fixed crashes related to directly created instances of types in    _tkinter and curses.panel modules. @@ -125,6 +127,8 @@ Library  - Issue #26873: xmlrpc now raises ResponseError on unsupported type tags    instead of silently return incorrect result. +- Issue #26881: modulefinder now works with bytecode with extended args. +  - Issue #26711: Fixed the comparison of plistlib.Data with other types.  - Issue #24114: Fix an uninitialized variable in `ctypes.util`.  | 
