diff options
Diffstat (limited to 'Python/compile.c')
-rw-r--r-- | Python/compile.c | 313 |
1 files changed, 112 insertions, 201 deletions
diff --git a/Python/compile.c b/Python/compile.c index ffde903d65..1e720eab0d 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -29,7 +29,6 @@ #include "code.h" #include "symtable.h" #include "opcode.h" -#include "wordcode_helpers.h" #define DEFAULT_BLOCK_SIZE 16 #define DEFAULT_BLOCKS 8 @@ -44,6 +43,7 @@ struct instr { unsigned i_jabs : 1; unsigned i_jrel : 1; + unsigned i_hasarg : 1; unsigned char i_opcode; int i_oparg; struct basicblock_ *i_target; /* target block (if jump instruction) */ @@ -171,6 +171,7 @@ static int compiler_addop(struct compiler *, int); static int compiler_addop_o(struct compiler *, int, PyObject *, PyObject *); static int compiler_addop_i(struct compiler *, int, Py_ssize_t); static int compiler_addop_j(struct compiler *, int, basicblock *, int); +static basicblock *compiler_use_new_block(struct compiler *); static int compiler_error(struct compiler *, const char *); static int compiler_nameop(struct compiler *, identifier, expr_context_ty); @@ -195,7 +196,7 @@ static int expr_constant(struct compiler *, expr_ty); static int compiler_with(struct compiler *, stmt_ty, int); static int compiler_async_with(struct compiler *, stmt_ty, int); static int compiler_async_for(struct compiler *, stmt_ty); -static int compiler_call_helper(struct compiler *c, int n, +static int compiler_call_helper(struct compiler *c, Py_ssize_t n, asdl_seq *args, asdl_seq *keywords); static int compiler_try_except(struct compiler *, stmt_ty); @@ -476,9 +477,9 @@ compiler_unit_check(struct compiler_unit *u) { basicblock *block; for (block = u->u_blocks; block != NULL; block = block->b_list) { - assert((Py_uintptr_t)block != 0xcbcbcbcbU); - assert((Py_uintptr_t)block != 0xfbfbfbfbU); - assert((Py_uintptr_t)block != 0xdbdbdbdbU); + assert((void *)block != (void *)0xcbcbcbcb); + assert((void *)block != (void *)0xfbfbfbfb); + assert((void *)block != (void *)0xdbdbdbdb); if (block->b_instr != NULL) { assert(block->b_ialloc > 0); assert(block->b_iused > 0); @@ -522,7 +523,6 @@ compiler_enter_scope(struct compiler *c, identifier name, int scope_type, void *key, int lineno) { struct compiler_unit *u; - basicblock *block; u = (struct compiler_unit *)PyObject_Malloc(sizeof( struct compiler_unit)); @@ -620,11 +620,8 @@ compiler_enter_scope(struct compiler *c, identifier name, c->u = u; c->c_nestlevel++; - - block = compiler_new_block(c); - if (block == NULL) + if (compiler_use_new_block(c) == NULL) return 0; - c->u->u_curblock = block; if (u->u_scope_type != COMPILER_SCOPE_MODULE) { if (!compiler_set_qualname(c)) @@ -734,7 +731,6 @@ compiler_set_qualname(struct compiler *c) return 1; } - /* Allocate a new block and return a pointer to it. Returns NULL on error. */ @@ -759,6 +755,16 @@ compiler_new_block(struct compiler *c) } static basicblock * +compiler_use_new_block(struct compiler *c) +{ + basicblock *block = compiler_new_block(c); + if (block == NULL) + return NULL; + c->u->u_curblock = block; + return block; +} + +static basicblock * compiler_next_block(struct compiler *c) { basicblock *block = compiler_new_block(c); @@ -1060,10 +1066,6 @@ PyCompile_OpcodeStackEffect(int opcode, int oparg) return 1; case GET_YIELD_FROM_ITER: return 0; - case FORMAT_VALUE: - /* If there's a fmt_spec on the stack, we go from 2->1, - else 1->1. */ - return (oparg & FVS_MASK) == FVS_HAVE_SPEC ? -1 : 0; default: return PY_INVALID_STACK_EFFECT; } @@ -1080,14 +1082,13 @@ compiler_addop(struct compiler *c, int opcode) basicblock *b; struct instr *i; int off; - assert(!HAS_ARG(opcode)); off = compiler_next_instr(c, c->u->u_curblock); if (off < 0) return 0; b = c->u->u_curblock; i = &b->b_instr[off]; i->i_opcode = opcode; - i->i_oparg = 0; + i->i_hasarg = 0; if (opcode == RETURN_VALUE) b->b_return = 1; compiler_set_lineno(c, off); @@ -1164,15 +1165,10 @@ compiler_addop_i(struct compiler *c, int opcode, Py_ssize_t oparg) struct instr *i; int off; - /* oparg value is unsigned, but a signed C int is usually used to store - it in the C code (like Python/ceval.c). - - Limit to 32-bit signed C int (rather than INT_MAX) for portability. - - The argument of a concrete bytecode instruction is limited to 8-bit. - EXTENDED_ARG is used for 16, 24, and 32-bit arguments. */ - assert(HAS_ARG(opcode)); - assert(0 <= oparg && oparg <= 2147483647); + /* Integer arguments are limit to 16-bit. There is an extension for 32-bit + integer arguments. */ + assert((-2147483647-1) <= oparg); + assert(oparg <= 2147483647); off = compiler_next_instr(c, c->u->u_curblock); if (off < 0) @@ -1180,6 +1176,7 @@ compiler_addop_i(struct compiler *c, int opcode, Py_ssize_t oparg) i = &c->u->u_curblock->b_instr[off]; i->i_opcode = opcode; i->i_oparg = Py_SAFE_DOWNCAST(oparg, Py_ssize_t, int); + i->i_hasarg = 1; compiler_set_lineno(c, off); return 1; } @@ -1190,7 +1187,6 @@ compiler_addop_j(struct compiler *c, int opcode, basicblock *b, int absolute) struct instr *i; int off; - assert(HAS_ARG(opcode)); assert(b != NULL); off = compiler_next_instr(c, c->u->u_curblock); if (off < 0) @@ -1198,6 +1194,7 @@ compiler_addop_j(struct compiler *c, int opcode, basicblock *b, int absolute) i = &c->u->u_curblock->b_instr[off]; i->i_opcode = opcode; i->i_target = b; + i->i_hasarg = 1; if (absolute) i->i_jabs = 1; else @@ -1206,12 +1203,22 @@ compiler_addop_j(struct compiler *c, int opcode, basicblock *b, int absolute) return 1; } -/* NEXT_BLOCK() creates an implicit jump from the current block - to the new block. +/* The distinction between NEW_BLOCK and NEXT_BLOCK is subtle. (I'd + like to find better names.) NEW_BLOCK() creates a new block and sets + it as the current block. NEXT_BLOCK() also creates an implicit jump + from the current block to the new block. +*/ - The returns inside this macro make it impossible to decref objects - created in the local function. Local objects should use the arena. +/* The returns inside these macros make it impossible to decref objects + created in the local function. Local objects should use the arena. */ + + +#define NEW_BLOCK(C) { \ + if (compiler_use_new_block((C)) == NULL) \ + return 0; \ +} + #define NEXT_BLOCK(C) { \ if (compiler_next_block((C)) == NULL) \ return 0; \ @@ -1302,11 +1309,7 @@ compiler_isdocstring(stmt_ty s) { if (s->kind != Expr_kind) return 0; - if (s->v.Expr.value->kind == Str_kind) - return 1; - if (s->v.Expr.value->kind == Constant_kind) - return PyUnicode_CheckExact(s->v.Expr.value->v.Constant.value); - return 0; + return s->v.Expr.value->kind == Str_kind; } /* Compile a sequence of statements, checking for a docstring. */ @@ -1680,12 +1683,8 @@ compiler_function(struct compiler *c, stmt_ty s, int is_async) st = (stmt_ty)asdl_seq_GET(body, 0); docstring = compiler_isdocstring(st); - if (docstring && c->c_optimize < 2) { - if (st->v.Expr.value->kind == Constant_kind) - first_const = st->v.Expr.value->v.Constant.value; - else - first_const = st->v.Expr.value->v.Str.s; - } + if (docstring && c->c_optimize < 2) + first_const = st->v.Expr.value->v.Str.s; if (compiler_add_o(c, c->u->u_consts, first_const) < 0) { compiler_exit_scope(c); return 0; @@ -2596,35 +2595,6 @@ compiler_assert(struct compiler *c, stmt_ty s) } static int -compiler_visit_stmt_expr(struct compiler *c, expr_ty value) -{ - if (c->c_interactive && c->c_nestlevel <= 1) { - VISIT(c, expr, value); - ADDOP(c, PRINT_EXPR); - return 1; - } - - switch (value->kind) - { - case Str_kind: - case Num_kind: - case Ellipsis_kind: - case Bytes_kind: - case NameConstant_kind: - case Constant_kind: - /* ignore constant statement */ - return 1; - - default: - break; - } - - VISIT(c, expr, value); - ADDOP(c, POP_TOP); - return 1; -} - -static int compiler_visit_stmt(struct compiler *c, stmt_ty s) { Py_ssize_t i, n; @@ -2694,7 +2664,16 @@ compiler_visit_stmt(struct compiler *c, stmt_ty s) case Nonlocal_kind: break; case Expr_kind: - return compiler_visit_stmt_expr(c, s->v.Expr.value); + if (c->c_interactive && c->c_nestlevel <= 1) { + VISIT(c, expr, s->v.Expr.value); + ADDOP(c, PRINT_EXPR); + } + else if (s->v.Expr.value->kind != Str_kind && + s->v.Expr.value->kind != Num_kind) { + VISIT(c, expr, s->v.Expr.value); + ADDOP(c, POP_TOP); + } + break; case Pass_kind: break; case Break_kind: @@ -3098,8 +3077,7 @@ compiler_set(struct compiler *c, expr_ty e) static int compiler_dict(struct compiler *c, expr_ty e) { - Py_ssize_t i, n, elements; - int containers; + Py_ssize_t i, n, containers, elements; int is_unpacking = 0; n = asdl_seq_LEN(e->v.Dict.values); containers = 0; @@ -3191,91 +3169,15 @@ compiler_call(struct compiler *c, expr_ty e) e->v.Call.keywords); } -static int -compiler_joined_str(struct compiler *c, expr_ty e) -{ - /* Concatenate parts of a string using ''.join(parts). There are - probably better ways of doing this. - - This is used for constructs like "'x=' f'{42}'", which have to - be evaluated at compile time. */ - - static PyObject *empty_string; - static PyObject *join_string; - - if (!empty_string) { - empty_string = PyUnicode_FromString(""); - if (!empty_string) - return 0; - } - if (!join_string) { - join_string = PyUnicode_FromString("join"); - if (!join_string) - return 0; - } - - ADDOP_O(c, LOAD_CONST, empty_string, consts); - ADDOP_NAME(c, LOAD_ATTR, join_string, names); - VISIT_SEQ(c, expr, e->v.JoinedStr.values); - ADDOP_I(c, BUILD_LIST, asdl_seq_LEN(e->v.JoinedStr.values)); - ADDOP_I(c, CALL_FUNCTION, 1); - return 1; -} - -/* Used to implement f-strings. Format a single value. */ -static int -compiler_formatted_value(struct compiler *c, expr_ty e) -{ - /* Our oparg encodes 2 pieces of information: the conversion - character, and whether or not a format_spec was provided. - - Convert the conversion char to 2 bits: - None: 000 0x0 FVC_NONE - !s : 001 0x1 FVC_STR - !r : 010 0x2 FVC_REPR - !a : 011 0x3 FVC_ASCII - - next bit is whether or not we have a format spec: - yes : 100 0x4 - no : 000 0x0 - */ - - int oparg; - - /* Evaluate the expression to be formatted. */ - VISIT(c, expr, e->v.FormattedValue.value); - - switch (e->v.FormattedValue.conversion) { - case 's': oparg = FVC_STR; break; - case 'r': oparg = FVC_REPR; break; - case 'a': oparg = FVC_ASCII; break; - case -1: oparg = FVC_NONE; break; - default: - PyErr_SetString(PyExc_SystemError, - "Unrecognized conversion character"); - return 0; - } - if (e->v.FormattedValue.format_spec) { - /* Evaluate the format spec, and update our opcode arg. */ - VISIT(c, expr, e->v.FormattedValue.format_spec); - oparg |= FVS_HAVE_SPEC; - } - - /* And push our opcode and oparg */ - ADDOP_I(c, FORMAT_VALUE, oparg); - return 1; -} - /* shared code between compiler_call and compiler_class */ static int compiler_call_helper(struct compiler *c, - int n, /* Args already pushed */ + Py_ssize_t n, /* Args already pushed */ asdl_seq *args, asdl_seq *keywords) { int code = 0; - Py_ssize_t nelts, i, nseen; - int nkw; + Py_ssize_t nelts, i, nseen, nkw; /* the number of tuples and dictionaries on the stack */ Py_ssize_t nsubargs = 0, nsubkwargs = 0; @@ -3643,8 +3545,6 @@ expr_constant(struct compiler *c, expr_ty e) switch (e->kind) { case Ellipsis_kind: return 1; - case Constant_kind: - return PyObject_IsTrue(e->v.Constant.value); case Num_kind: return PyObject_IsTrue(e->v.Num.n); case Str_kind: @@ -3688,9 +3588,9 @@ expr_constant(struct compiler *c, expr_ty e) BLOCK finally: if an exception was raised: - exc = copy of (exception, instance, traceback) + exc = copy of (exception, instance, traceback) else: - exc = (None, None, None) + exc = (None, None, None) if not (await exit(*exc)): raise */ @@ -3932,19 +3832,12 @@ compiler_visit_expr(struct compiler *c, expr_ty e) return compiler_compare(c, e); case Call_kind: return compiler_call(c, e); - case Constant_kind: - ADDOP_O(c, LOAD_CONST, e->v.Constant.value, consts); - break; case Num_kind: ADDOP_O(c, LOAD_CONST, e->v.Num.n, consts); break; case Str_kind: ADDOP_O(c, LOAD_CONST, e->v.Str.s, consts); break; - case JoinedStr_kind: - return compiler_joined_str(c, e); - case FormattedValue_kind: - return compiler_formatted_value(c, e); case Bytes_kind: ADDOP_O(c, LOAD_CONST, e->v.Bytes.s, consts); break; @@ -4398,6 +4291,18 @@ assemble_free(struct assembler *a) PyObject_Free(a->a_postorder); } +/* Return the size of a basic block in bytes. */ + +static int +instrsize(struct instr *instr) +{ + if (!instr->i_hasarg) + return 1; /* 1 byte for the opcode*/ + if (instr->i_oparg > 0xffff) + return 6; /* 1 (opcode) + 1 (EXTENDED_ARG opcode) + 2 (oparg) + 2(oparg extended) */ + return 3; /* 1 (opcode) + 2 (oparg) */ +} + static int blocksize(basicblock *b) { @@ -4405,7 +4310,7 @@ blocksize(basicblock *b) int size = 0; for (i = 0; i < b->b_iused; i++) - size += instrsize(b->b_instr[i].i_oparg); + size += instrsize(&b->b_instr[i]); return size; } @@ -4424,6 +4329,7 @@ assemble_lnotab(struct assembler *a, struct instr *i) d_lineno = i->i_lineno - a->a_lineno; assert(d_bytecode >= 0); + assert(d_lineno >= 0); if(d_bytecode == 0 && d_lineno == 0) return 1; @@ -4453,21 +4359,9 @@ assemble_lnotab(struct assembler *a, struct instr *i) d_bytecode -= ncodes * 255; a->a_lnotab_off += ncodes * 2; } - assert(0 <= d_bytecode && d_bytecode <= 255); - - if (d_lineno < -128 || 127 < d_lineno) { - int j, nbytes, ncodes, k; - if (d_lineno < 0) { - k = -128; - /* use division on positive numbers */ - ncodes = (-d_lineno) / 128; - } - else { - k = 127; - ncodes = d_lineno / 127; - } - d_lineno -= ncodes * k; - assert(ncodes >= 1); + assert(d_bytecode <= 255); + if (d_lineno > 255) { + int j, nbytes, ncodes = d_lineno / 255; nbytes = a->a_lnotab_off + 2 * ncodes; len = PyBytes_GET_SIZE(a->a_lnotab); if (nbytes >= len) { @@ -4485,15 +4379,15 @@ assemble_lnotab(struct assembler *a, struct instr *i) lnotab = (unsigned char *) PyBytes_AS_STRING(a->a_lnotab) + a->a_lnotab_off; *lnotab++ = d_bytecode; - *lnotab++ = k; + *lnotab++ = 255; d_bytecode = 0; for (j = 1; j < ncodes; j++) { *lnotab++ = 0; - *lnotab++ = k; + *lnotab++ = 255; } + d_lineno -= ncodes * 255; a->a_lnotab_off += ncodes * 2; } - assert(-128 <= d_lineno && d_lineno <= 127); len = PyBytes_GET_SIZE(a->a_lnotab); if (a->a_lnotab_off + 2 >= len) { @@ -4525,12 +4419,15 @@ assemble_lnotab(struct assembler *a, struct instr *i) static int assemble_emit(struct assembler *a, struct instr *i) { - int size, arg = 0; + int size, arg = 0, ext = 0; Py_ssize_t len = PyBytes_GET_SIZE(a->a_bytecode); char *code; - arg = i->i_oparg; - size = instrsize(arg); + size = instrsize(i); + if (i->i_hasarg) { + arg = i->i_oparg; + ext = arg >> 16; + } if (i->i_lineno && !assemble_lnotab(a, i)) return 0; if (a->a_offset + size >= len) { @@ -4541,7 +4438,19 @@ assemble_emit(struct assembler *a, struct instr *i) } code = PyBytes_AS_STRING(a->a_bytecode) + a->a_offset; a->a_offset += size; - write_op_arg((unsigned char*)code, i->i_opcode, arg, size); + if (size == 6) { + assert(i->i_hasarg); + *code++ = (char)EXTENDED_ARG; + *code++ = ext & 0xff; + *code++ = ext >> 8; + arg &= 0xffff; + } + *code++ = i->i_opcode; + if (i->i_hasarg) { + assert(size == 3 || size == 6); + *code++ = arg & 0xff; + *code++ = arg >> 8; + } return 1; } @@ -4549,7 +4458,7 @@ static void assemble_jump_offsets(struct assembler *a, struct compiler *c) { basicblock *b; - int bsize, totsize, extended_arg_recompile; + int bsize, totsize, extended_arg_count = 0, last_extended_arg_count; int i; /* Compute the size of each block and fixup jump args. @@ -4562,26 +4471,27 @@ assemble_jump_offsets(struct assembler *a, struct compiler *c) b->b_offset = totsize; totsize += bsize; } - extended_arg_recompile = 0; + last_extended_arg_count = extended_arg_count; + extended_arg_count = 0; for (b = c->u->u_blocks; b != NULL; b = b->b_list) { bsize = b->b_offset; for (i = 0; i < b->b_iused; i++) { struct instr *instr = &b->b_instr[i]; - int isize = instrsize(instr->i_oparg); /* Relative jumps are computed relative to the instruction pointer after fetching the jump instruction. */ - bsize += isize; - if (instr->i_jabs || instr->i_jrel) { + bsize += instrsize(instr); + if (instr->i_jabs) instr->i_oparg = instr->i_target->b_offset; - if (instr->i_jrel) { - instr->i_oparg -= bsize; - } - if (instrsize(instr->i_oparg) != isize) { - extended_arg_recompile = 1; - } + else if (instr->i_jrel) { + int delta = instr->i_target->b_offset - bsize; + instr->i_oparg = delta; } + else + continue; + if (instr->i_oparg > 0xffff) + extended_arg_count++; } } @@ -4591,7 +4501,7 @@ assemble_jump_offsets(struct assembler *a, struct compiler *c) The issue is that in the first loop blocksize() is called which calls instrsize() which requires i_oparg be set - appropriately. There is a bootstrap problem because + appropriately. There is a bootstrap problem because i_oparg is calculated in the second loop above. So we loop until we stop seeing new EXTENDED_ARGs. @@ -4599,7 +4509,7 @@ assemble_jump_offsets(struct assembler *a, struct compiler *c) ones in jump instructions. So this should converge fairly quickly. */ - } while (extended_arg_recompile); + } while (last_extended_arg_count != extended_arg_count); } static PyObject * @@ -4745,9 +4655,9 @@ dump_instr(const struct instr *i) char arg[128]; *arg = '\0'; - if (HAS_ARG(i->i_opcode)) { + if (i->i_hasarg) sprintf(arg, "arg: %d ", i->i_oparg); - } + fprintf(stderr, "line: %d, opcode: %d %s%s%s\n", i->i_lineno, i->i_opcode, arg, jabs, jrel); } @@ -4835,3 +4745,4 @@ PyAST_Compile(mod_ty mod, const char *filename, PyCompilerFlags *flags, { return PyAST_CompileEx(mod, filename, flags, -1, arena); } + |