summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Lib/dis.py5
-rw-r--r--Lib/test/test_peepholer.py69
-rw-r--r--Misc/NEWS.d/next/Library/2019-09-13-09-24-58.bpo-38115.BOO-Y1.rst1
-rw-r--r--Objects/lnotab_notes.txt4
4 files changed, 77 insertions, 2 deletions
diff --git a/Lib/dis.py b/Lib/dis.py
index a25fb2b417..10e5f7fb08 100644
--- a/Lib/dis.py
+++ b/Lib/dis.py
@@ -454,6 +454,7 @@ def findlinestarts(code):
"""
byte_increments = code.co_lnotab[0::2]
line_increments = code.co_lnotab[1::2]
+ bytecode_len = len(code.co_code)
lastlineno = None
lineno = code.co_firstlineno
@@ -464,6 +465,10 @@ def findlinestarts(code):
yield (addr, lineno)
lastlineno = lineno
addr += byte_incr
+ if addr >= bytecode_len:
+ # The rest of the lnotab byte offsets are past the end of
+ # the bytecode, so the lines were optimized away.
+ return
if line_incr >= 0x80:
# line_increments is an array of 8-bit signed integers
line_incr -= 0x100
diff --git a/Lib/test/test_peepholer.py b/Lib/test/test_peepholer.py
index 47dee33076..23cc36c605 100644
--- a/Lib/test/test_peepholer.py
+++ b/Lib/test/test_peepholer.py
@@ -40,6 +40,20 @@ class TestTranforms(BytecodeTestCase):
self.fail(f'{instr.opname} at {instr.offset} '
f'jumps to {tgt.opname} at {tgt.offset}')
+ def check_lnotab(self, code):
+ "Check that the lnotab byte offsets are sensible."
+ code = dis._get_code_object(code)
+ lnotab = list(dis.findlinestarts(code))
+ # Don't bother checking if the line info is sensible, because
+ # most of the line info we can get at comes from lnotab.
+ min_bytecode = min(t[0] for t in lnotab)
+ max_bytecode = max(t[0] for t in lnotab)
+ self.assertGreaterEqual(min_bytecode, 0)
+ self.assertLess(max_bytecode, len(code.co_code))
+ # This could conceivably test more (and probably should, as there
+ # aren't very many tests of lnotab), if peepholer wasn't scheduled
+ # to be replaced anyway.
+
def test_unot(self):
# UNARY_NOT POP_JUMP_IF_FALSE --> POP_JUMP_IF_TRUE'
def unot(x):
@@ -48,6 +62,7 @@ class TestTranforms(BytecodeTestCase):
self.assertNotInBytecode(unot, 'UNARY_NOT')
self.assertNotInBytecode(unot, 'POP_JUMP_IF_FALSE')
self.assertInBytecode(unot, 'POP_JUMP_IF_TRUE')
+ self.check_lnotab(unot)
def test_elim_inversion_of_is_or_in(self):
for line, cmp_op in (
@@ -58,6 +73,7 @@ class TestTranforms(BytecodeTestCase):
):
code = compile(line, '', 'single')
self.assertInBytecode(code, 'COMPARE_OP', cmp_op)
+ self.check_lnotab(code)
def test_global_as_constant(self):
# LOAD_GLOBAL None/True/False --> LOAD_CONST None/True/False
@@ -75,6 +91,7 @@ class TestTranforms(BytecodeTestCase):
for func, elem in ((f, None), (g, True), (h, False)):
self.assertNotInBytecode(func, 'LOAD_GLOBAL')
self.assertInBytecode(func, 'LOAD_CONST', elem)
+ self.check_lnotab(func)
def f():
'Adding a docstring made this test fail in Py2.5.0'
@@ -82,6 +99,7 @@ class TestTranforms(BytecodeTestCase):
self.assertNotInBytecode(f, 'LOAD_GLOBAL')
self.assertInBytecode(f, 'LOAD_CONST', None)
+ self.check_lnotab(f)
def test_while_one(self):
# Skip over: LOAD_CONST trueconst POP_JUMP_IF_FALSE xx
@@ -93,6 +111,7 @@ class TestTranforms(BytecodeTestCase):
self.assertNotInBytecode(f, elem)
for elem in ('JUMP_ABSOLUTE',):
self.assertInBytecode(f, elem)
+ self.check_lnotab(f)
def test_pack_unpack(self):
for line, elem in (
@@ -104,6 +123,7 @@ class TestTranforms(BytecodeTestCase):
self.assertInBytecode(code, elem)
self.assertNotInBytecode(code, 'BUILD_TUPLE')
self.assertNotInBytecode(code, 'UNPACK_TUPLE')
+ self.check_lnotab(code)
def test_folding_of_tuples_of_constants(self):
for line, elem in (
@@ -116,6 +136,7 @@ class TestTranforms(BytecodeTestCase):
code = compile(line,'','single')
self.assertInBytecode(code, 'LOAD_CONST', elem)
self.assertNotInBytecode(code, 'BUILD_TUPLE')
+ self.check_lnotab(code)
# Long tuples should be folded too.
code = compile(repr(tuple(range(10000))),'','single')
@@ -124,6 +145,7 @@ class TestTranforms(BytecodeTestCase):
load_consts = [instr for instr in dis.get_instructions(code)
if instr.opname == 'LOAD_CONST']
self.assertEqual(len(load_consts), 2)
+ self.check_lnotab(code)
# Bug 1053819: Tuple of constants misidentified when presented with:
# . . . opcode_with_arg 100 unary_opcode BUILD_TUPLE 1 . . .
@@ -141,6 +163,7 @@ class TestTranforms(BytecodeTestCase):
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
0, 1, 2, 3, 4, 5, 6, 7, 8, 9,
],)
+ self.check_lnotab(crater)
def test_folding_of_lists_of_constants(self):
for line, elem in (
@@ -153,6 +176,7 @@ class TestTranforms(BytecodeTestCase):
code = compile(line, '', 'single')
self.assertInBytecode(code, 'LOAD_CONST', elem)
self.assertNotInBytecode(code, 'BUILD_LIST')
+ self.check_lnotab(code)
def test_folding_of_sets_of_constants(self):
for line, elem in (
@@ -166,6 +190,7 @@ class TestTranforms(BytecodeTestCase):
code = compile(line, '', 'single')
self.assertNotInBytecode(code, 'BUILD_SET')
self.assertInBytecode(code, 'LOAD_CONST', elem)
+ self.check_lnotab(code)
# Ensure that the resulting code actually works:
def f(a):
@@ -176,9 +201,11 @@ class TestTranforms(BytecodeTestCase):
self.assertTrue(f(3))
self.assertTrue(not f(4))
+ self.check_lnotab(f)
self.assertTrue(not g(3))
self.assertTrue(g(4))
+ self.check_lnotab(g)
def test_folding_of_binops_on_constants(self):
@@ -203,41 +230,50 @@ class TestTranforms(BytecodeTestCase):
self.assertInBytecode(code, 'LOAD_CONST', elem)
for instr in dis.get_instructions(code):
self.assertFalse(instr.opname.startswith('BINARY_'))
+ self.check_lnotab(code)
# Verify that unfoldables are skipped
code = compile('a=2+"b"', '', 'single')
self.assertInBytecode(code, 'LOAD_CONST', 2)
self.assertInBytecode(code, 'LOAD_CONST', 'b')
+ self.check_lnotab(code)
# Verify that large sequences do not result from folding
code = compile('a="x"*10000', '', 'single')
self.assertInBytecode(code, 'LOAD_CONST', 10000)
self.assertNotIn("x"*10000, code.co_consts)
+ self.check_lnotab(code)
code = compile('a=1<<1000', '', 'single')
self.assertInBytecode(code, 'LOAD_CONST', 1000)
self.assertNotIn(1<<1000, code.co_consts)
+ self.check_lnotab(code)
code = compile('a=2**1000', '', 'single')
self.assertInBytecode(code, 'LOAD_CONST', 1000)
self.assertNotIn(2**1000, code.co_consts)
+ self.check_lnotab(code)
def test_binary_subscr_on_unicode(self):
# valid code get optimized
code = compile('"foo"[0]', '', 'single')
self.assertInBytecode(code, 'LOAD_CONST', 'f')
self.assertNotInBytecode(code, 'BINARY_SUBSCR')
+ self.check_lnotab(code)
code = compile('"\u0061\uffff"[1]', '', 'single')
self.assertInBytecode(code, 'LOAD_CONST', '\uffff')
self.assertNotInBytecode(code,'BINARY_SUBSCR')
+ self.check_lnotab(code)
# With PEP 393, non-BMP char get optimized
code = compile('"\U00012345"[0]', '', 'single')
self.assertInBytecode(code, 'LOAD_CONST', '\U00012345')
self.assertNotInBytecode(code, 'BINARY_SUBSCR')
+ self.check_lnotab(code)
# invalid code doesn't get optimized
# out of range
code = compile('"fuu"[10]', '', 'single')
self.assertInBytecode(code, 'BINARY_SUBSCR')
+ self.check_lnotab(code)
def test_folding_of_unaryops_on_constants(self):
for line, elem in (
@@ -252,13 +288,15 @@ class TestTranforms(BytecodeTestCase):
self.assertInBytecode(code, 'LOAD_CONST', elem)
for instr in dis.get_instructions(code):
self.assertFalse(instr.opname.startswith('UNARY_'))
+ self.check_lnotab(code)
# Check that -0.0 works after marshaling
def negzero():
return -(1.0-1.0)
- for instr in dis.get_instructions(code):
+ for instr in dis.get_instructions(negzero):
self.assertFalse(instr.opname.startswith('UNARY_'))
+ self.check_lnotab(negzero)
# Verify that unfoldables are skipped
for line, elem, opname in (
@@ -268,6 +306,7 @@ class TestTranforms(BytecodeTestCase):
code = compile(line, '', 'single')
self.assertInBytecode(code, 'LOAD_CONST', elem)
self.assertInBytecode(code, opname)
+ self.check_lnotab(code)
def test_elim_extra_return(self):
# RETURN LOAD_CONST None RETURN --> RETURN
@@ -277,6 +316,7 @@ class TestTranforms(BytecodeTestCase):
returns = [instr for instr in dis.get_instructions(f)
if instr.opname == 'RETURN_VALUE']
self.assertEqual(len(returns), 1)
+ self.check_lnotab(f)
def test_elim_jump_to_return(self):
# JUMP_FORWARD to RETURN --> RETURN
@@ -290,6 +330,7 @@ class TestTranforms(BytecodeTestCase):
returns = [instr for instr in dis.get_instructions(f)
if instr.opname == 'RETURN_VALUE']
self.assertEqual(len(returns), 2)
+ self.check_lnotab(f)
def test_elim_jump_to_uncond_jump(self):
# POP_JUMP_IF_FALSE to JUMP_FORWARD --> POP_JUMP_IF_FALSE to non-jump
@@ -302,6 +343,7 @@ class TestTranforms(BytecodeTestCase):
else:
baz()
self.check_jump_targets(f)
+ self.check_lnotab(f)
def test_elim_jump_to_uncond_jump2(self):
# POP_JUMP_IF_FALSE to JUMP_ABSOLUTE --> POP_JUMP_IF_FALSE to non-jump
@@ -312,6 +354,7 @@ class TestTranforms(BytecodeTestCase):
or d):
a = foo()
self.check_jump_targets(f)
+ self.check_lnotab(f)
def test_elim_jump_to_uncond_jump3(self):
# Intentionally use two-line expressions to test issue37213.
@@ -320,18 +363,21 @@ class TestTranforms(BytecodeTestCase):
return ((a and b)
and c)
self.check_jump_targets(f)
+ self.check_lnotab(f)
self.assertEqual(count_instr_recursively(f, 'JUMP_IF_FALSE_OR_POP'), 2)
# JUMP_IF_TRUE_OR_POP to JUMP_IF_TRUE_OR_POP --> JUMP_IF_TRUE_OR_POP to non-jump
def f(a, b, c):
return ((a or b)
or c)
self.check_jump_targets(f)
+ self.check_lnotab(f)
self.assertEqual(count_instr_recursively(f, 'JUMP_IF_TRUE_OR_POP'), 2)
# JUMP_IF_FALSE_OR_POP to JUMP_IF_TRUE_OR_POP --> POP_JUMP_IF_FALSE to non-jump
def f(a, b, c):
return ((a and b)
or c)
self.check_jump_targets(f)
+ self.check_lnotab(f)
self.assertNotInBytecode(f, 'JUMP_IF_FALSE_OR_POP')
self.assertInBytecode(f, 'JUMP_IF_TRUE_OR_POP')
self.assertInBytecode(f, 'POP_JUMP_IF_FALSE')
@@ -340,6 +386,7 @@ class TestTranforms(BytecodeTestCase):
return ((a or b)
and c)
self.check_jump_targets(f)
+ self.check_lnotab(f)
self.assertNotInBytecode(f, 'JUMP_IF_TRUE_OR_POP')
self.assertInBytecode(f, 'JUMP_IF_FALSE_OR_POP')
self.assertInBytecode(f, 'POP_JUMP_IF_TRUE')
@@ -360,6 +407,7 @@ class TestTranforms(BytecodeTestCase):
returns = [instr for instr in dis.get_instructions(f)
if instr.opname == 'RETURN_VALUE']
self.assertLessEqual(len(returns), 6)
+ self.check_lnotab(f)
def test_elim_jump_after_return2(self):
# Eliminate dead code: jumps immediately after returns can't be reached
@@ -374,6 +422,7 @@ class TestTranforms(BytecodeTestCase):
returns = [instr for instr in dis.get_instructions(f)
if instr.opname == 'RETURN_VALUE']
self.assertLessEqual(len(returns), 2)
+ self.check_lnotab(f)
def test_make_function_doesnt_bail(self):
def f():
@@ -381,6 +430,7 @@ class TestTranforms(BytecodeTestCase):
pass
return g
self.assertNotInBytecode(f, 'BINARY_ADD')
+ self.check_lnotab(f)
def test_constant_folding(self):
# Issue #11244: aggressive constant folding.
@@ -401,17 +451,20 @@ class TestTranforms(BytecodeTestCase):
self.assertFalse(instr.opname.startswith('UNARY_'))
self.assertFalse(instr.opname.startswith('BINARY_'))
self.assertFalse(instr.opname.startswith('BUILD_'))
+ self.check_lnotab(code)
def test_in_literal_list(self):
def containtest():
return x in [a, b]
self.assertEqual(count_instr_recursively(containtest, 'BUILD_LIST'), 0)
+ self.check_lnotab(containtest)
def test_iterate_literal_list(self):
def forloop():
for x in [a, b]:
pass
self.assertEqual(count_instr_recursively(forloop, 'BUILD_LIST'), 0)
+ self.check_lnotab(forloop)
def test_condition_with_binop_with_bools(self):
def f():
@@ -419,6 +472,7 @@ class TestTranforms(BytecodeTestCase):
return 1
return 0
self.assertEqual(f(), 1)
+ self.check_lnotab(f)
def test_if_with_if_expression(self):
# Check bpo-37289
@@ -427,6 +481,19 @@ class TestTranforms(BytecodeTestCase):
return True
return False
self.assertTrue(f(True))
+ self.check_lnotab(f)
+
+ def test_trailing_nops(self):
+ # Check the lnotab of a function that even after trivial
+ # optimization has trailing nops, which the lnotab adjustment has to
+ # handle properly (bpo-38115).
+ def f(x):
+ while 1:
+ return 3
+ while 1:
+ return 5
+ return 6
+ self.check_lnotab(f)
class TestBuglets(unittest.TestCase):
diff --git a/Misc/NEWS.d/next/Library/2019-09-13-09-24-58.bpo-38115.BOO-Y1.rst b/Misc/NEWS.d/next/Library/2019-09-13-09-24-58.bpo-38115.BOO-Y1.rst
new file mode 100644
index 0000000000..5119c0546e
--- /dev/null
+++ b/Misc/NEWS.d/next/Library/2019-09-13-09-24-58.bpo-38115.BOO-Y1.rst
@@ -0,0 +1 @@
+Fix a bug in dis.findlinestarts() where it would return invalid bytecode offsets. Document that a code object's co_lnotab can contain invalid bytecode offsets. \ No newline at end of file
diff --git a/Objects/lnotab_notes.txt b/Objects/lnotab_notes.txt
index 3dab2b9866..71a2979718 100644
--- a/Objects/lnotab_notes.txt
+++ b/Objects/lnotab_notes.txt
@@ -3,7 +3,9 @@ All about co_lnotab, the line number table.
Code objects store a field named co_lnotab. This is an array of unsigned bytes
disguised as a Python bytes object. It is used to map bytecode offsets to
source code line #s for tracebacks and to identify line number boundaries for
-line tracing.
+line tracing. Because of internals of the peephole optimizer, it's possible
+for lnotab to contain bytecode offsets that are no longer valid (for example
+if the optimizer removed the last line in a function).
The array is conceptually a compressed list of
(bytecode offset increment, line number increment)