diff options
author | Jeffrey Yasskin <jyasskin@gmail.com> | 2009-02-28 19:03:21 +0000 |
---|---|---|
committer | Jeffrey Yasskin <jyasskin@gmail.com> | 2009-02-28 19:03:21 +0000 |
commit | 68d68520061a5a4aa3437f8c74ba383b2da50280 (patch) | |
tree | aaade0afac9b9369b58c5642e2635fa319f61c02 /Python | |
parent | de28d6841e60d75ce7644ca527448f73376ec48e (diff) | |
download | cpython-git-68d68520061a5a4aa3437f8c74ba383b2da50280.tar.gz |
Backport r69961 to trunk, replacing JUMP_IF_{TRUE,FALSE} with
POP_JUMP_IF_{TRUE,FALSE} and JUMP_IF_{TRUE,FALSE}_OR_POP. This avoids executing
a POP_TOP on each conditional and sometimes allows the peephole optimizer to
skip a JUMP_ABSOLUTE entirely. It speeds up list comprehensions significantly.
Diffstat (limited to 'Python')
-rw-r--r-- | Python/ceval.c | 84 | ||||
-rw-r--r-- | Python/compile.c | 86 | ||||
-rw-r--r-- | Python/import.c | 4 | ||||
-rw-r--r-- | Python/peephole.c | 96 |
4 files changed, 164 insertions, 106 deletions
diff --git a/Python/ceval.c b/Python/ceval.c index 419ecfaaaa..88b9d0e888 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -764,8 +764,8 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag) /* OpCode prediction macros Some opcodes tend to come in pairs thus making it possible to predict the second code when the first is run. For example, - COMPARE_OP is often followed by JUMP_IF_FALSE or JUMP_IF_TRUE. And, - those opcodes are often followed by a POP_TOP. + GET_ITER is often followed by FOR_ITER. And FOR_ITER is often + followed by STORE_FAST or UNPACK_SEQUENCE. Verifying the prediction costs a single high-speed test of a register variable against a constant. If the pairing was good, then the @@ -1110,7 +1110,6 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag) SETLOCAL(oparg, v); goto fast_next_opcode; - PREDICTED(POP_TOP); case POP_TOP: v = POP(); Py_DECREF(v); @@ -2220,8 +2219,8 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag) Py_DECREF(w); SET_TOP(x); if (x == NULL) break; - PREDICT(JUMP_IF_FALSE); - PREDICT(JUMP_IF_TRUE); + PREDICT(POP_JUMP_IF_FALSE); + PREDICT(POP_JUMP_IF_TRUE); continue; case IMPORT_NAME: @@ -2298,41 +2297,45 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag) JUMPBY(oparg); goto fast_next_opcode; - PREDICTED_WITH_ARG(JUMP_IF_FALSE); - case JUMP_IF_FALSE: - w = TOP(); + PREDICTED_WITH_ARG(POP_JUMP_IF_FALSE); + case POP_JUMP_IF_FALSE: + w = POP(); if (w == Py_True) { - PREDICT(POP_TOP); + Py_DECREF(w); goto fast_next_opcode; } if (w == Py_False) { - JUMPBY(oparg); + Py_DECREF(w); + JUMPTO(oparg); goto fast_next_opcode; } err = PyObject_IsTrue(w); + Py_DECREF(w); if (err > 0) err = 0; else if (err == 0) - JUMPBY(oparg); + JUMPTO(oparg); else break; continue; - PREDICTED_WITH_ARG(JUMP_IF_TRUE); - case JUMP_IF_TRUE: - w = TOP(); + PREDICTED_WITH_ARG(POP_JUMP_IF_TRUE); + case POP_JUMP_IF_TRUE: + w = POP(); if (w == Py_False) { - PREDICT(POP_TOP); + Py_DECREF(w); goto fast_next_opcode; } if (w == Py_True) { - JUMPBY(oparg); + Py_DECREF(w); + JUMPTO(oparg); goto fast_next_opcode; } err = PyObject_IsTrue(w); + Py_DECREF(w); if (err > 0) { err = 0; - JUMPBY(oparg); + JUMPTO(oparg); } else if (err == 0) ; @@ -2340,6 +2343,53 @@ PyEval_EvalFrameEx(PyFrameObject *f, int throwflag) break; continue; + case JUMP_IF_FALSE_OR_POP: + w = TOP(); + if (w == Py_True) { + STACKADJ(-1); + Py_DECREF(w); + goto fast_next_opcode; + } + if (w == Py_False) { + JUMPTO(oparg); + goto fast_next_opcode; + } + err = PyObject_IsTrue(w); + if (err > 0) { + STACKADJ(-1); + Py_DECREF(w); + err = 0; + } + else if (err == 0) + JUMPTO(oparg); + else + break; + continue; + + case JUMP_IF_TRUE_OR_POP: + w = TOP(); + if (w == Py_False) { + STACKADJ(-1); + Py_DECREF(w); + goto fast_next_opcode; + } + if (w == Py_True) { + JUMPTO(oparg); + goto fast_next_opcode; + } + err = PyObject_IsTrue(w); + if (err > 0) { + err = 0; + JUMPTO(oparg); + } + else if (err == 0) { + STACKADJ(-1); + Py_DECREF(w); + } + else + break; + continue; + PREDICTED_WITH_ARG(JUMP_ABSOLUTE); case JUMP_ABSOLUTE: JUMPTO(oparg); diff --git a/Python/compile.c b/Python/compile.c index a252bd15f1..69321ae0c4 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -836,11 +836,15 @@ opcode_stack_effect(int opcode, int oparg) return 1; case JUMP_FORWARD: - case JUMP_IF_FALSE: - case JUMP_IF_TRUE: + case JUMP_IF_TRUE_OR_POP: /* -1 if jump not taken */ + case JUMP_IF_FALSE_OR_POP: /* "" */ case JUMP_ABSOLUTE: return 0; + case POP_JUMP_IF_FALSE: + case POP_JUMP_IF_TRUE: + return -1; + case LOAD_GLOBAL: return 1; @@ -1499,12 +1503,10 @@ compiler_ifexp(struct compiler *c, expr_ty e) if (next == NULL) return 0; VISIT(c, expr, e->v.IfExp.test); - ADDOP_JREL(c, JUMP_IF_FALSE, next); - ADDOP(c, POP_TOP); + ADDOP_JABS(c, POP_JUMP_IF_FALSE, next); VISIT(c, expr, e->v.IfExp.body); ADDOP_JREL(c, JUMP_FORWARD, end); compiler_use_next_block(c, next); - ADDOP(c, POP_TOP); VISIT(c, expr, e->v.IfExp.orelse); compiler_use_next_block(c, end); return 1; @@ -1597,9 +1599,6 @@ compiler_if(struct compiler *c, stmt_ty s) end = compiler_new_block(c); if (end == NULL) return 0; - next = compiler_new_block(c); - if (next == NULL) - return 0; constant = expr_constant(s->v.If.test); /* constant = 0: "if 0" @@ -1611,15 +1610,21 @@ compiler_if(struct compiler *c, stmt_ty s) } else if (constant == 1) { VISIT_SEQ(c, stmt, s->v.If.body); } else { + if (s->v.If.orelse) { + next = compiler_new_block(c); + if (next == NULL) + return 0; + } + else + next = end; VISIT(c, expr, s->v.If.test); - ADDOP_JREL(c, JUMP_IF_FALSE, next); - ADDOP(c, POP_TOP); + ADDOP_JABS(c, POP_JUMP_IF_FALSE, next); VISIT_SEQ(c, stmt, s->v.If.body); ADDOP_JREL(c, JUMP_FORWARD, end); - compiler_use_next_block(c, next); - ADDOP(c, POP_TOP); - if (s->v.If.orelse) + if (s->v.If.orelse) { + compiler_use_next_block(c, next); VISIT_SEQ(c, stmt, s->v.If.orelse); + } } compiler_use_next_block(c, end); return 1; @@ -1693,8 +1698,7 @@ compiler_while(struct compiler *c, stmt_ty s) so we need to set an extra line number. */ c->u->u_lineno_set = false; VISIT(c, expr, s->v.While.test); - ADDOP_JREL(c, JUMP_IF_FALSE, anchor); - ADDOP(c, POP_TOP); + ADDOP_JABS(c, POP_JUMP_IF_FALSE, anchor); } VISIT_SEQ(c, stmt, s->v.While.body); ADDOP_JABS(c, JUMP_ABSOLUTE, loop); @@ -1705,7 +1709,6 @@ compiler_while(struct compiler *c, stmt_ty s) if (constant == -1) { compiler_use_next_block(c, anchor); - ADDOP(c, POP_TOP); ADDOP(c, POP_BLOCK); } compiler_pop_fblock(c, LOOP, loop); @@ -1826,20 +1829,17 @@ compiler_try_finally(struct compiler *c, stmt_ty s) [tb, val, exc] L1: DUP ) [tb, val, exc, exc] <evaluate E1> ) [tb, val, exc, exc, E1] COMPARE_OP EXC_MATCH ) only if E1 - [tb, val, exc, 1-or-0] JUMP_IF_FALSE L2 ) - [tb, val, exc, 1] POP ) + [tb, val, exc, 1-or-0] POP_JUMP_IF_FALSE L2 ) [tb, val, exc] POP [tb, val] <assign to V1> (or POP if no V1) [tb] POP [] <code for S1> JUMP_FORWARD L0 - [tb, val, exc, 0] L2: POP - [tb, val, exc] DUP + [tb, val, exc] L2: DUP .............................etc....................... - [tb, val, exc, 0] Ln+1: POP - [tb, val, exc] END_FINALLY # re-raise exception + [tb, val, exc] Ln+1: END_FINALLY # re-raise exception [] L0: <next statement> @@ -1881,8 +1881,7 @@ compiler_try_except(struct compiler *c, stmt_ty s) ADDOP(c, DUP_TOP); VISIT(c, expr, handler->v.ExceptHandler.type); ADDOP_I(c, COMPARE_OP, PyCmp_EXC_MATCH); - ADDOP_JREL(c, JUMP_IF_FALSE, except); - ADDOP(c, POP_TOP); + ADDOP_JABS(c, POP_JUMP_IF_FALSE, except); } ADDOP(c, POP_TOP); if (handler->v.ExceptHandler.name) { @@ -1895,8 +1894,6 @@ compiler_try_except(struct compiler *c, stmt_ty s) VISIT_SEQ(c, stmt, handler->v.ExceptHandler.body); ADDOP_JREL(c, JUMP_FORWARD, end); compiler_use_next_block(c, except); - if (handler->v.ExceptHandler.type) - ADDOP(c, POP_TOP); } ADDOP(c, END_FINALLY); compiler_use_next_block(c, orelse); @@ -2084,8 +2081,7 @@ compiler_assert(struct compiler *c, stmt_ty s) end = compiler_new_block(c); if (end == NULL) return 0; - ADDOP_JREL(c, JUMP_IF_TRUE, end); - ADDOP(c, POP_TOP); + ADDOP_JABS(c, POP_JUMP_IF_TRUE, end); ADDOP_O(c, LOAD_GLOBAL, assertion_error, names); if (s->v.Assert.msg) { VISIT(c, expr, s->v.Assert.msg); @@ -2095,7 +2091,6 @@ compiler_assert(struct compiler *c, stmt_ty s) ADDOP_I(c, RAISE_VARARGS, 1); } compiler_use_next_block(c, end); - ADDOP(c, POP_TOP); return 1; } @@ -2473,9 +2468,9 @@ compiler_boolop(struct compiler *c, expr_ty e) assert(e->kind == BoolOp_kind); if (e->v.BoolOp.op == And) - jumpi = JUMP_IF_FALSE; + jumpi = JUMP_IF_FALSE_OR_POP; else - jumpi = JUMP_IF_TRUE; + jumpi = JUMP_IF_TRUE_OR_POP; end = compiler_new_block(c); if (end == NULL) return 0; @@ -2484,8 +2479,7 @@ compiler_boolop(struct compiler *c, expr_ty e) assert(n >= 0); for (i = 0; i < n; ++i) { VISIT(c, expr, (expr_ty)asdl_seq_GET(s, i)); - ADDOP_JREL(c, jumpi, end); - ADDOP(c, POP_TOP) + ADDOP_JABS(c, jumpi, end); } VISIT(c, expr, (expr_ty)asdl_seq_GET(s, n)); compiler_use_next_block(c, end); @@ -2543,9 +2537,8 @@ compiler_compare(struct compiler *c, expr_ty e) ADDOP_I(c, COMPARE_OP, cmpop((cmpop_ty)(asdl_seq_GET( e->v.Compare.ops, i - 1)))); - ADDOP_JREL(c, JUMP_IF_FALSE, cleanup); + ADDOP_JABS(c, JUMP_IF_FALSE_OR_POP, cleanup); NEXT_BLOCK(c); - ADDOP(c, POP_TOP); if (i < (n - 1)) VISIT(c, expr, (expr_ty)asdl_seq_GET(e->v.Compare.comparators, i)); @@ -2636,9 +2629,8 @@ compiler_listcomp_generator(struct compiler *c, asdl_seq *generators, for (i = 0; i < n; i++) { expr_ty e = (expr_ty)asdl_seq_GET(l->ifs, i); VISIT(c, expr, e); - ADDOP_JREL(c, JUMP_IF_FALSE, if_cleanup); + ADDOP_JABS(c, POP_JUMP_IF_FALSE, if_cleanup); NEXT_BLOCK(c); - ADDOP(c, POP_TOP); } if (++gen_index < asdl_seq_LEN(generators)) @@ -2652,12 +2644,7 @@ compiler_listcomp_generator(struct compiler *c, asdl_seq *generators, compiler_use_next_block(c, skip); } - for (i = 0; i < n; i++) { - ADDOP_I(c, JUMP_FORWARD, 1); - if (i == 0) - compiler_use_next_block(c, if_cleanup); - ADDOP(c, POP_TOP); - } + compiler_use_next_block(c, if_cleanup); ADDOP_JABS(c, JUMP_ABSOLUTE, start); compiler_use_next_block(c, anchor); @@ -2720,10 +2707,9 @@ compiler_genexp_generator(struct compiler *c, for (i = 0; i < n; i++) { expr_ty e = (expr_ty)asdl_seq_GET(ge->ifs, i); VISIT(c, expr, e); - ADDOP_JREL(c, JUMP_IF_FALSE, if_cleanup); + ADDOP_JABS(c, POP_JUMP_IF_FALSE, if_cleanup); NEXT_BLOCK(c); - ADDOP(c, POP_TOP); - } + } if (++gen_index < asdl_seq_LEN(generators)) if (!compiler_genexp_generator(c, generators, gen_index, elt)) @@ -2737,13 +2723,7 @@ compiler_genexp_generator(struct compiler *c, compiler_use_next_block(c, skip); } - for (i = 0; i < n; i++) { - ADDOP_I(c, JUMP_FORWARD, 1); - if (i == 0) - compiler_use_next_block(c, if_cleanup); - - ADDOP(c, POP_TOP); - } + compiler_use_next_block(c, if_cleanup); ADDOP_JABS(c, JUMP_ABSOLUTE, start); compiler_use_next_block(c, anchor); ADDOP(c, POP_BLOCK); diff --git a/Python/import.c b/Python/import.c index b00986c697..98b8dceba4 100644 --- a/Python/import.c +++ b/Python/import.c @@ -72,9 +72,11 @@ typedef unsigned short mode_t; Python 2.6a0: 62151 (peephole optimizations and STORE_MAP opcode) Python 2.6a1: 62161 (WITH_CLEANUP optimization) Python 2.7a0: 62171 (optimize list comprehensions/change LIST_APPEND) + Python 2.7a0: 62181 (optimize conditional branches: + introduce POP_JUMP_IF_FALSE and POP_JUMP_IF_TRUE) . */ -#define MAGIC (62171 | ((long)'\r'<<16) | ((long)'\n'<<24)) +#define MAGIC (62181 | ((long)'\r'<<16) | ((long)'\n'<<24)) /* Magic word as global; note that _PyImport_Init() can change the value of this global to accommodate for alterations of how the diff --git a/Python/peephole.c b/Python/peephole.c index e9da377dc6..a3dda9c8aa 100644 --- a/Python/peephole.c +++ b/Python/peephole.c @@ -13,7 +13,12 @@ #define GETARG(arr, i) ((int)((arr[i+2]<<8) + arr[i+1])) #define UNCONDITIONAL_JUMP(op) (op==JUMP_ABSOLUTE || op==JUMP_FORWARD) -#define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP) +#define CONDITIONAL_JUMP(op) (op==POP_JUMP_IF_FALSE || op==POP_JUMP_IF_TRUE \ + || op==JUMP_IF_FALSE_OR_POP || op==JUMP_IF_TRUE_OR_POP) +#define ABSOLUTE_JUMP(op) (op==JUMP_ABSOLUTE || op==CONTINUE_LOOP \ + || op==POP_JUMP_IF_FALSE || op==POP_JUMP_IF_TRUE \ + || op==JUMP_IF_FALSE_OR_POP || op==JUMP_IF_TRUE_OR_POP) +#define JUMPS_ON_TRUE(op) (op==POP_JUMP_IF_TRUE || op==JUMP_IF_TRUE_OR_POP) #define GETJUMPTGT(arr, i) (GETARG(arr,i) + (ABSOLUTE_JUMP(arr[i]) ? 0 : i+3)) #define SETARG(arr, i, val) arr[i+2] = val>>8; arr[i+1] = val & 255 #define CODESIZE(op) (HAS_ARG(op) ? 3 : 1) @@ -245,8 +250,10 @@ markblocks(unsigned char *code, Py_ssize_t len) switch (opcode) { case FOR_ITER: case JUMP_FORWARD: - case JUMP_IF_FALSE: - case JUMP_IF_TRUE: + case JUMP_IF_FALSE_OR_POP: + case JUMP_IF_TRUE_OR_POP: + case POP_JUMP_IF_FALSE: + case POP_JUMP_IF_TRUE: case JUMP_ABSOLUTE: case CONTINUE_LOOP: case SETUP_LOOP: @@ -338,29 +345,24 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, assert(PyList_Check(consts)); for (i=0 ; i<codelen ; i += CODESIZE(codestr[i])) { + reoptimize_current: opcode = codestr[i]; lastlc = cumlc; cumlc = 0; switch (opcode) { - - /* Replace UNARY_NOT JUMP_IF_FALSE POP_TOP with - with JUMP_IF_TRUE POP_TOP */ + /* Replace UNARY_NOT POP_JUMP_IF_FALSE + with POP_JUMP_IF_TRUE */ case UNARY_NOT: - if (codestr[i+1] != JUMP_IF_FALSE || - codestr[i+4] != POP_TOP || - !ISBASICBLOCK(blocks,i,5)) - continue; - tgt = GETJUMPTGT(codestr, (i+1)); - if (codestr[tgt] != POP_TOP) + if (codestr[i+1] != POP_JUMP_IF_FALSE + || !ISBASICBLOCK(blocks,i,4)) continue; - j = GETARG(codestr, i+1) + 1; - codestr[i] = JUMP_IF_TRUE; + j = GETARG(codestr, i+1); + codestr[i] = POP_JUMP_IF_TRUE; SETARG(codestr, i, j); - codestr[i+3] = POP_TOP; - codestr[i+4] = NOP; - break; + codestr[i+3] = NOP; + goto reoptimize_current; /* not a is b --> a is not b not a in b --> a not in b @@ -400,16 +402,16 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, break; /* Skip over LOAD_CONST trueconst - JUMP_IF_FALSE xx POP_TOP */ + POP_JUMP_IF_FALSE xx. This improves + "while 1" performance. */ case LOAD_CONST: cumlc = lastlc + 1; j = GETARG(codestr, i); - if (codestr[i+3] != JUMP_IF_FALSE || - codestr[i+6] != POP_TOP || - !ISBASICBLOCK(blocks,i,7) || + if (codestr[i+3] != POP_JUMP_IF_FALSE || + !ISBASICBLOCK(blocks,i,6) || !PyObject_IsTrue(PyList_GET_ITEM(consts, j))) continue; - memset(codestr+i, NOP, 7); + memset(codestr+i, NOP, 6); cumlc = 0; break; @@ -498,27 +500,49 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, "if a or b:" "a and b or c" "(a and b) and c" - x:JUMP_IF_FALSE y y:JUMP_IF_FALSE z --> x:JUMP_IF_FALSE z - x:JUMP_IF_FALSE y y:JUMP_IF_TRUE z --> x:JUMP_IF_FALSE y+3 + x:JUMP_IF_FALSE_OR_POP y y:JUMP_IF_FALSE_OR_POP z + --> x:JUMP_IF_FALSE_OR_POP z + x:JUMP_IF_FALSE_OR_POP y y:JUMP_IF_TRUE_OR_POP z + --> x:POP_JUMP_IF_FALSE y+3 where y+3 is the instruction following the second test. */ - case JUMP_IF_FALSE: - case JUMP_IF_TRUE: + case JUMP_IF_FALSE_OR_POP: + case JUMP_IF_TRUE_OR_POP: tgt = GETJUMPTGT(codestr, i); j = codestr[tgt]; - if (j == JUMP_IF_FALSE || j == JUMP_IF_TRUE) { - if (j == opcode) { - tgttgt = GETJUMPTGT(codestr, tgt) - i - 3; + if (CONDITIONAL_JUMP(j)) { + /* NOTE: all possible jumps here are + absolute! */ + if (JUMPS_ON_TRUE(j) == JUMPS_ON_TRUE(opcode)) { + /* The second jump will be + taken iff the first is. */ + tgttgt = GETJUMPTGT(codestr, tgt); + /* The current opcode inherits + its target's stack behaviour */ + codestr[i] = j; SETARG(codestr, i, tgttgt); + goto reoptimize_current; } else { - tgt -= i; - SETARG(codestr, i, tgt); + /* The second jump is not taken + if the first is (so jump past + it), and all conditional + jumps pop their argument when + they're not taken (so change + the first jump to pop its + argument when it's taken). */ + if (JUMPS_ON_TRUE(opcode)) + codestr[i] = POP_JUMP_IF_TRUE; + else + codestr[i] = POP_JUMP_IF_FALSE; + SETARG(codestr, i, (tgt + 3)); + goto reoptimize_current; } - break; } - /* Intentional fallthrough */ + /* Intentional fallthrough */ /* Replace jumps to unconditional jumps */ + case POP_JUMP_IF_FALSE: + case POP_JUMP_IF_TRUE: case FOR_ITER: case JUMP_FORWARD: case JUMP_ABSOLUTE: @@ -591,14 +615,16 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names, case JUMP_ABSOLUTE: case CONTINUE_LOOP: + case POP_JUMP_IF_FALSE: + case POP_JUMP_IF_TRUE: + case JUMP_IF_FALSE_OR_POP: + case JUMP_IF_TRUE_OR_POP: j = addrmap[GETARG(codestr, i)]; SETARG(codestr, i, j); break; case FOR_ITER: case JUMP_FORWARD: - case JUMP_IF_FALSE: - case JUMP_IF_TRUE: case SETUP_LOOP: case SETUP_EXCEPT: case SETUP_FINALLY: |