diff options
| -rw-r--r-- | Include/code.h | 10 | ||||
| -rw-r--r-- | Objects/frameobject.c | 4 | ||||
| -rw-r--r-- | Objects/genobject.c | 4 | ||||
| -rw-r--r-- | Python/ceval.c | 48 | ||||
| -rw-r--r-- | Python/compile.c | 13 | ||||
| -rw-r--r-- | Python/peephole.c | 217 | ||||
| -rw-r--r-- | Python/wordcode_helpers.h | 35 | 
7 files changed, 173 insertions, 158 deletions
| diff --git a/Include/code.h b/Include/code.h index 9823f10d4e..061902911d 100644 --- a/Include/code.h +++ b/Include/code.h @@ -7,6 +7,16 @@  extern "C" {  #endif +typedef uint16_t _Py_CODEUNIT; + +#ifdef WORDS_BIGENDIAN +#  define _Py_OPCODE(word) ((word) >> 8) +#  define _Py_OPARG(word) ((word) & 255) +#else +#  define _Py_OPCODE(word) ((word) & 255) +#  define _Py_OPARG(word) ((word) >> 8) +#endif +  /* Bytecode object */  typedef struct {      PyObject_HEAD diff --git a/Objects/frameobject.c b/Objects/frameobject.c index b115614419..62f9f34c8e 100644 --- a/Objects/frameobject.c +++ b/Objects/frameobject.c @@ -189,7 +189,7 @@ frame_setlineno(PyFrameObject *f, PyObject* p_new_lineno)      memset(blockstack, '\0', sizeof(blockstack));      memset(in_finally, '\0', sizeof(in_finally));      blockstack_top = 0; -    for (addr = 0; addr < code_len; addr += 2) { +    for (addr = 0; addr < code_len; addr += sizeof(_Py_CODEUNIT)) {          unsigned char op = code[addr];          switch (op) {          case SETUP_LOOP: @@ -273,7 +273,7 @@ frame_setlineno(PyFrameObject *f, PyObject* p_new_lineno)       * can tell whether the jump goes into any blocks without coming out       * again - in that case we raise an exception below. */      delta_iblock = 0; -    for (addr = min_addr; addr < max_addr; addr += 2) { +    for (addr = min_addr; addr < max_addr; addr += sizeof(_Py_CODEUNIT)) {          unsigned char op = code[addr];          switch (op) {          case SETUP_LOOP: diff --git a/Objects/genobject.c b/Objects/genobject.c index bc5309a82a..7a1e9fd6ab 100644 --- a/Objects/genobject.c +++ b/Objects/genobject.c @@ -390,7 +390,7 @@ _PyGen_yf(PyGenObject *gen)          PyObject *bytecode = f->f_code->co_code;          unsigned char *code = (unsigned char *)PyBytes_AS_STRING(bytecode); -        if (code[f->f_lasti + 2] != YIELD_FROM) +        if (code[f->f_lasti + sizeof(_Py_CODEUNIT)] != YIELD_FROM)              return NULL;          yf = f->f_stacktop[-1];          Py_INCREF(yf); @@ -498,7 +498,7 @@ _gen_throw(PyGenObject *gen, int close_on_genexit,              assert(ret == yf);              Py_DECREF(ret);              /* Termination repetition of YIELD_FROM */ -            gen->gi_frame->f_lasti += 2; +            gen->gi_frame->f_lasti += sizeof(_Py_CODEUNIT);              if (_PyGen_FetchStopIterationValue(&val) == 0) {                  ret = gen_send_ex(gen, val, 0, 0);                  Py_DECREF(val); diff --git a/Python/ceval.c b/Python/ceval.c index a396e81ef7..11ad1fed26 100644 --- a/Python/ceval.c +++ b/Python/ceval.c @@ -62,7 +62,7 @@ static int import_all_from(PyObject *, PyObject *);  static void format_exc_check_arg(PyObject *, const char *, PyObject *);  static void format_exc_unbound(PyCodeObject *co, int oparg);  static PyObject * unicode_concatenate(PyObject *, PyObject *, -                                      PyFrameObject *, const unsigned short *); +                                      PyFrameObject *, const _Py_CODEUNIT *);  static PyObject * special_lookup(PyObject *, _Py_Identifier *);  #define NAME_ERROR_MSG \ @@ -725,7 +725,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)      int lastopcode = 0;  #endif      PyObject **stack_pointer;  /* Next free slot in value stack */ -    const unsigned short *next_instr; +    const _Py_CODEUNIT *next_instr;      int opcode;        /* Current opcode */      int oparg;         /* Current opcode argument, if any */      enum why_code why; /* Reason for block stack unwind */ @@ -743,7 +743,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)         time it is tested. */      int instr_ub = -1, instr_lb = 0, instr_prev = -1; -    const unsigned short *first_instr; +    const _Py_CODEUNIT *first_instr;      PyObject *names;      PyObject *consts; @@ -864,23 +864,16 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)  /* Code access macros */ -#ifdef WORDS_BIGENDIAN -    #define OPCODE(word) ((word) >> 8) -    #define OPARG(word) ((word) & 255) -#else -    #define OPCODE(word) ((word) & 255) -    #define OPARG(word) ((word) >> 8) -#endif  /* The integer overflow is checked by an assertion below. */ -#define INSTR_OFFSET()  (2*(int)(next_instr - first_instr)) +#define INSTR_OFFSET()  (sizeof(_Py_CODEUNIT) * (int)(next_instr - first_instr))  #define NEXTOPARG()  do { \ -        unsigned short word = *next_instr; \ -        opcode = OPCODE(word); \ -        oparg = OPARG(word); \ +        _Py_CODEUNIT word = *next_instr; \ +        opcode = _Py_OPCODE(word); \ +        oparg = _Py_OPARG(word); \          next_instr++; \      } while (0) -#define JUMPTO(x)       (next_instr = first_instr + (x)/2) -#define JUMPBY(x)       (next_instr += (x)/2) +#define JUMPTO(x)       (next_instr = first_instr + (x) / sizeof(_Py_CODEUNIT)) +#define JUMPBY(x)       (next_instr += (x) / sizeof(_Py_CODEUNIT))  /* OpCode prediction macros      Some opcodes tend to come in pairs thus making it possible to @@ -913,10 +906,10 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)  #else  #define PREDICT(op) \      do{ \ -        unsigned short word = *next_instr; \ -        opcode = OPCODE(word); \ +        _Py_CODEUNIT word = *next_instr; \ +        opcode = _Py_OPCODE(word); \          if (opcode == op){ \ -            oparg = OPARG(word); \ +            oparg = _Py_OPARG(word); \              next_instr++; \              goto PRED_##op; \          } \ @@ -1056,9 +1049,9 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)      freevars = f->f_localsplus + co->co_nlocals;      assert(PyBytes_Check(co->co_code));      assert(PyBytes_GET_SIZE(co->co_code) <= INT_MAX); -    assert(PyBytes_GET_SIZE(co->co_code) % 2 == 0); -    assert(_Py_IS_ALIGNED(PyBytes_AS_STRING(co->co_code), 2)); -    first_instr = (unsigned short*) PyBytes_AS_STRING(co->co_code); +    assert(PyBytes_GET_SIZE(co->co_code) % sizeof(_Py_CODEUNIT) == 0); +    assert(_Py_IS_ALIGNED(PyBytes_AS_STRING(co->co_code), sizeof(_Py_CODEUNIT))); +    first_instr = (_Py_CODEUNIT *) PyBytes_AS_STRING(co->co_code);      /*         f->f_lasti refers to the index of the last instruction,         unless it's -1 in which case next_instr should be first_instr. @@ -1074,10 +1067,11 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)         FOR_ITER is effectively a single opcode and f->f_lasti will point         to the beginning of the combined pair.)      */ +    assert(f->f_lasti >= -1);      next_instr = first_instr;      if (f->f_lasti >= 0) { -        assert(f->f_lasti % 2 == 0); -        next_instr += f->f_lasti/2 + 1; +        assert(f->f_lasti % sizeof(_Py_CODEUNIT) == 0); +        next_instr += f->f_lasti / sizeof(_Py_CODEUNIT) + 1;      }      stack_pointer = f->f_stacktop;      assert(stack_pointer != NULL); @@ -1125,7 +1119,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)             Py_MakePendingCalls() above. */          if (_Py_atomic_load_relaxed(&eval_breaker)) { -            if (OPCODE(*next_instr) == SETUP_FINALLY) { +            if (_Py_OPCODE(*next_instr) == SETUP_FINALLY) {                  /* Make the last opcode before                     a try: finally: block uninterruptible. */                  goto fast_next_opcode; @@ -2049,7 +2043,7 @@ _PyEval_EvalFrameDefault(PyFrameObject *f, int throwflag)              f->f_stacktop = stack_pointer;              why = WHY_YIELD;              /* and repeat... */ -            f->f_lasti -= 2; +            f->f_lasti -= sizeof(_Py_CODEUNIT);              goto fast_yield;          } @@ -5321,7 +5315,7 @@ format_exc_unbound(PyCodeObject *co, int oparg)  static PyObject *  unicode_concatenate(PyObject *v, PyObject *w, -                    PyFrameObject *f, const unsigned short *next_instr) +                    PyFrameObject *f, const _Py_CODEUNIT *next_instr)  {      PyObject *res;      if (Py_REFCNT(v) == 2) { diff --git a/Python/compile.c b/Python/compile.c index 4631dbbf18..6d64c076e1 100644 --- a/Python/compile.c +++ b/Python/compile.c @@ -4948,7 +4948,7 @@ assemble_lnotab(struct assembler *a, struct instr *i)      Py_ssize_t len;      unsigned char *lnotab; -    d_bytecode = a->a_offset - a->a_lineno_off; +    d_bytecode = (a->a_offset - a->a_lineno_off) * sizeof(_Py_CODEUNIT);      d_lineno = i->i_lineno - a->a_lineno;      assert(d_bytecode >= 0); @@ -5055,21 +5055,21 @@ assemble_emit(struct assembler *a, struct instr *i)  {      int size, arg = 0;      Py_ssize_t len = PyBytes_GET_SIZE(a->a_bytecode); -    char *code; +    _Py_CODEUNIT *code;      arg = i->i_oparg;      size = instrsize(arg);      if (i->i_lineno && !assemble_lnotab(a, i))          return 0; -    if (a->a_offset + size >= len) { +    if (a->a_offset + size >= len / (int)sizeof(_Py_CODEUNIT)) {          if (len > PY_SSIZE_T_MAX / 2)              return 0;          if (_PyBytes_Resize(&a->a_bytecode, len * 2) < 0)              return 0;      } -    code = PyBytes_AS_STRING(a->a_bytecode) + a->a_offset; +    code = (_Py_CODEUNIT *)PyBytes_AS_STRING(a->a_bytecode) + a->a_offset;      a->a_offset += size; -    write_op_arg((unsigned char*)code, i->i_opcode, arg, size); +    write_op_arg(code, i->i_opcode, arg, size);      return 1;  } @@ -5106,6 +5106,7 @@ assemble_jump_offsets(struct assembler *a, struct compiler *c)                      if (instr->i_jrel) {                          instr->i_oparg -= bsize;                      } +                    instr->i_oparg *= sizeof(_Py_CODEUNIT);                      if (instrsize(instr->i_oparg) != isize) {                          extended_arg_recompile = 1;                      } @@ -5351,7 +5352,7 @@ assemble(struct compiler *c, int addNone)      if (_PyBytes_Resize(&a.a_lnotab, a.a_lnotab_off) < 0)          goto error; -    if (_PyBytes_Resize(&a.a_bytecode, a.a_offset) < 0) +    if (_PyBytes_Resize(&a.a_bytecode, a.a_offset * sizeof(_Py_CODEUNIT)) < 0)          goto error;      co = makecode(c, &a); diff --git a/Python/peephole.c b/Python/peephole.c index b2c0279b73..68e36f7331 100644 --- a/Python/peephole.c +++ b/Python/peephole.c @@ -17,7 +17,8 @@      || op==POP_JUMP_IF_FALSE || op==POP_JUMP_IF_TRUE \      || op==JUMP_IF_FALSE_OR_POP || op==JUMP_IF_TRUE_OR_POP)  #define JUMPS_ON_TRUE(op) (op==POP_JUMP_IF_TRUE || op==JUMP_IF_TRUE_OR_POP) -#define GETJUMPTGT(arr, i) (get_arg(arr, i) + (ABSOLUTE_JUMP(arr[i]) ? 0 : i+2)) +#define GETJUMPTGT(arr, i) (get_arg(arr, i) / sizeof(_Py_CODEUNIT) + \ +        (ABSOLUTE_JUMP(_Py_OPCODE(arr[i])) ? 0 : i+1))  #define ISBASICBLOCK(blocks, start, end) \      (blocks[start]==blocks[end]) @@ -40,7 +41,7 @@  #define CONST_STACK_PUSH_OP(i) do { \      PyObject *_x; \ -    assert(codestr[i] == LOAD_CONST); \ +    assert(_Py_OPCODE(codestr[i]) == LOAD_CONST); \      assert(PyList_GET_SIZE(consts) > (Py_ssize_t)get_arg(codestr, i)); \      _x = PyList_GET_ITEM(consts, get_arg(codestr, i)); \      if (++const_stack_top >= const_stack_size) { \ @@ -72,33 +73,33 @@     Callers are responsible to check CONST_STACK_LEN beforehand.  */  static Py_ssize_t -lastn_const_start(unsigned char *codestr, Py_ssize_t i, Py_ssize_t n) +lastn_const_start(const _Py_CODEUNIT *codestr, Py_ssize_t i, Py_ssize_t n)  { -    assert(n > 0 && (i&1) == 0); +    assert(n > 0);      for (;;) { -        i -= 2; +        i--;          assert(i >= 0); -        if (codestr[i] == LOAD_CONST) { +        if (_Py_OPCODE(codestr[i]) == LOAD_CONST) {              if (!--n) { -                while (i > 0 && codestr[i-2] == EXTENDED_ARG) { -                    i -= 2; +                while (i > 0 && _Py_OPCODE(codestr[i-1]) == EXTENDED_ARG) { +                    i--;                  }                  return i;              }          }          else { -            assert(codestr[i] == NOP || codestr[i] == EXTENDED_ARG); +            assert(_Py_OPCODE(codestr[i]) == NOP || +                   _Py_OPCODE(codestr[i]) == EXTENDED_ARG);          }      }  }  /* Scans through EXTENDED ARGs, seeking the index of the effective opcode */  static Py_ssize_t -find_op(unsigned char *codestr, Py_ssize_t i) +find_op(const _Py_CODEUNIT *codestr, Py_ssize_t i)  { -    assert((i&1) == 0); -    while (codestr[i] == EXTENDED_ARG) { -        i += 2; +    while (_Py_OPCODE(codestr[i]) == EXTENDED_ARG) { +        i++;      }      return i;  } @@ -106,27 +107,34 @@ find_op(unsigned char *codestr, Py_ssize_t i)  /* Given the index of the effective opcode,     scan back to construct the oparg with EXTENDED_ARG */  static unsigned int -get_arg(unsigned char *codestr, Py_ssize_t i) +get_arg(const _Py_CODEUNIT *codestr, Py_ssize_t i)  { -    unsigned int oparg = codestr[i+1]; -    assert((i&1) == 0); -    if (i >= 2 && codestr[i-2] == EXTENDED_ARG) { -        oparg |= codestr[i-1] << 8; -        if (i >= 4 && codestr[i-4] == EXTENDED_ARG) { -            oparg |= codestr[i-3] << 16; -            if (i >= 6 && codestr[i-6] == EXTENDED_ARG) { -                oparg |= codestr[i-5] << 24; +    _Py_CODEUNIT word; +    unsigned int oparg = _Py_OPARG(codestr[i]); +    if (i >= 1 && _Py_OPCODE(word = codestr[i-1]) == EXTENDED_ARG) { +        oparg |= _Py_OPARG(word) << 8; +        if (i >= 2 && _Py_OPCODE(word = codestr[i-2]) == EXTENDED_ARG) { +            oparg |= _Py_OPARG(word) << 16; +            if (i >= 3 && _Py_OPCODE(word = codestr[i-3]) == EXTENDED_ARG) { +                oparg |= _Py_OPARG(word) << 24;              }          }      }      return oparg;  } +/* Fill the region with NOPs. */ +static void +fill_nops(_Py_CODEUNIT *codestr, Py_ssize_t start, Py_ssize_t end) +{ +    memset(codestr + start, NOP, (end - start) * sizeof(_Py_CODEUNIT)); +} +  /* Given the index of the effective opcode,     attempt to replace the argument, taking into account EXTENDED_ARG.     Returns -1 on failure, or the new op index on success */  static Py_ssize_t -set_arg(unsigned char *codestr, Py_ssize_t i, unsigned int oparg) +set_arg(_Py_CODEUNIT *codestr, Py_ssize_t i, unsigned int oparg)  {      unsigned int curarg = get_arg(codestr, i);      int curilen, newilen; @@ -138,8 +146,8 @@ set_arg(unsigned char *codestr, Py_ssize_t i, unsigned int oparg)          return -1;      } -    write_op_arg(codestr + i + 2 - curilen, codestr[i], oparg, newilen); -    memset(codestr + i + 2 - curilen + newilen, NOP, curilen - newilen); +    write_op_arg(codestr + i + 1 - curilen, _Py_OPCODE(codestr[i]), oparg, newilen); +    fill_nops(codestr, i + 1 - curilen + newilen, i + 1);      return i-curilen+newilen;  } @@ -147,17 +155,16 @@ set_arg(unsigned char *codestr, Py_ssize_t i, unsigned int oparg)     Preceding memory in the region is overwritten with NOPs.     Returns -1 on failure, op index on success */  static Py_ssize_t -copy_op_arg(unsigned char *codestr, Py_ssize_t i, unsigned char op, +copy_op_arg(_Py_CODEUNIT *codestr, Py_ssize_t i, unsigned char op,              unsigned int oparg, Py_ssize_t maxi)  {      int ilen = instrsize(oparg); -    assert((i&1) == 0);      if (i + ilen > maxi) {          return -1;      }      write_op_arg(codestr + maxi - ilen, op, oparg, ilen); -    memset(codestr + i, NOP, maxi - i - ilen); -    return maxi - 2; +    fill_nops(codestr, i, maxi - ilen); +    return maxi - 1;  }  /* Replace LOAD_CONST c1, LOAD_CONST c2 ... LOAD_CONST cn, BUILD_TUPLE n @@ -170,7 +177,7 @@ copy_op_arg(unsigned char *codestr, Py_ssize_t i, unsigned char op,     test; for BUILD_SET it assembles a frozenset rather than a tuple.  */  static Py_ssize_t -fold_tuple_on_constants(unsigned char *codestr, Py_ssize_t c_start, +fold_tuple_on_constants(_Py_CODEUNIT *codestr, Py_ssize_t c_start,                          Py_ssize_t opcode_end, unsigned char opcode,                          PyObject *consts, PyObject **objs, int n)  { @@ -222,7 +229,7 @@ fold_tuple_on_constants(unsigned char *codestr, Py_ssize_t c_start,     becoming large in the presence of code like:  (None,)*1000.  */  static Py_ssize_t -fold_binops_on_constants(unsigned char *codestr, Py_ssize_t c_start, +fold_binops_on_constants(_Py_CODEUNIT *codestr, Py_ssize_t c_start,                           Py_ssize_t opcode_end, unsigned char opcode,                           PyObject *consts, PyObject **objs)  { @@ -311,7 +318,7 @@ fold_binops_on_constants(unsigned char *codestr, Py_ssize_t c_start,  }  static Py_ssize_t -fold_unaryops_on_constants(unsigned char *codestr, Py_ssize_t c_start, +fold_unaryops_on_constants(_Py_CODEUNIT *codestr, Py_ssize_t c_start,                             Py_ssize_t opcode_end, unsigned char opcode,                             PyObject *consts, PyObject *v)  { @@ -359,7 +366,7 @@ fold_unaryops_on_constants(unsigned char *codestr, Py_ssize_t c_start,  }  static unsigned int * -markblocks(unsigned char *code, Py_ssize_t len) +markblocks(_Py_CODEUNIT *code, Py_ssize_t len)  {      unsigned int *blocks = PyMem_New(unsigned int, len);      int i, j, opcode, blockcnt = 0; @@ -371,8 +378,8 @@ markblocks(unsigned char *code, Py_ssize_t len)      memset(blocks, 0, len*sizeof(int));      /* Mark labels in the first pass */ -    for (i=0 ; i<len ; i+=2) { -        opcode = code[i]; +    for (i = 0; i < len; i++) { +        opcode = _Py_OPCODE(code[i]);          switch (opcode) {              case FOR_ITER:              case JUMP_FORWARD: @@ -388,12 +395,13 @@ markblocks(unsigned char *code, Py_ssize_t len)              case SETUP_WITH:              case SETUP_ASYNC_WITH:                  j = GETJUMPTGT(code, i); +                assert(j < len);                  blocks[j] = 1;                  break;          }      }      /* Build block numbers in the second pass */ -    for (i=0 ; i<len ; i+=2) { +    for (i = 0; i < len; i++) {          blockcnt += blocks[i];          /* increment blockcnt over labels */          blocks[i] = blockcnt;      } @@ -420,7 +428,7 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,      Py_ssize_t h, i, nexti, op_start, codelen, tgt;      unsigned int j, nops;      unsigned char opcode, nextop; -    unsigned char *codestr = NULL; +    _Py_CODEUNIT *codestr = NULL;      unsigned char *lnotab;      unsigned int cum_orig_offset, last_offset;      Py_ssize_t tabsiz; @@ -448,16 +456,16 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,      assert(PyBytes_Check(code));      codelen = PyBytes_GET_SIZE(code); -    assert(codelen % 2 == 0); +    assert(codelen % sizeof(_Py_CODEUNIT) == 0);      /* Make a modifiable copy of the code string */ -    codestr = (unsigned char *)PyMem_Malloc(codelen); +    codestr = (_Py_CODEUNIT *)PyMem_Malloc(codelen);      if (codestr == NULL) {          PyErr_NoMemory();          goto exitError;      } -    codestr = (unsigned char *)memcpy(codestr, -                                      PyBytes_AS_STRING(code), codelen); +    memcpy(codestr, PyBytes_AS_STRING(code), codelen); +    codelen /= sizeof(_Py_CODEUNIT);      blocks = markblocks(codestr, codelen);      if (blocks == NULL) @@ -469,14 +477,14 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,      for (i=find_op(codestr, 0) ; i<codelen ; i=nexti) {          opcode = codestr[i];          op_start = i; -        while (op_start >= 2 && codestr[op_start-2] == EXTENDED_ARG) { -            op_start -= 2; +        while (op_start >= 1 && _Py_OPCODE(codestr[op_start-1]) == EXTENDED_ARG) { +            op_start--;          } -        nexti = i + 2; -        while (nexti < codelen && codestr[nexti] == EXTENDED_ARG) -            nexti += 2; -        nextop = nexti < codelen ? codestr[nexti] : 0; +        nexti = i + 1; +        while (nexti < codelen && _Py_OPCODE(codestr[nexti]) == EXTENDED_ARG) +            nexti++; +        nextop = nexti < codelen ? _Py_OPCODE(codestr[nexti]) : 0;          if (!in_consts) {              CONST_STACK_RESET(); @@ -488,10 +496,10 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,                 with    POP_JUMP_IF_TRUE */              case UNARY_NOT:                  if (nextop != POP_JUMP_IF_FALSE -                    || !ISBASICBLOCK(blocks, op_start, i+2)) +                    || !ISBASICBLOCK(blocks, op_start, i + 1))                      break; -                memset(codestr + op_start, NOP, i - op_start + 2); -                codestr[nexti] = POP_JUMP_IF_TRUE; +                fill_nops(codestr, op_start, i + 1); +                codestr[nexti] = PACKOPARG(POP_JUMP_IF_TRUE, _Py_OPARG(codestr[nexti]));                  break;                  /* not a is b -->  a is not b @@ -503,10 +511,10 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,                  j = get_arg(codestr, i);                  if (j < 6 || j > 9 ||                      nextop != UNARY_NOT || -                    !ISBASICBLOCK(blocks, op_start, i + 2)) +                    !ISBASICBLOCK(blocks, op_start, i + 1))                      break; -                codestr[i+1] = (j^1); -                memset(codestr + i + 2, NOP, nexti - i); +                codestr[i] = PACKOPARG(opcode, j^1); +                fill_nops(codestr, i + 1, nexti + 1);                  break;                  /* Skip over LOAD_CONST trueconst @@ -515,10 +523,10 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,              case LOAD_CONST:                  CONST_STACK_PUSH_OP(i);                  if (nextop != POP_JUMP_IF_FALSE  || -                    !ISBASICBLOCK(blocks, op_start, i + 2)  || +                    !ISBASICBLOCK(blocks, op_start, i + 1)  ||                      !PyObject_IsTrue(PyList_GET_ITEM(consts, get_arg(codestr, i))))                      break; -                memset(codestr + op_start, NOP, nexti - op_start + 2); +                fill_nops(codestr, op_start, nexti + 1);                  CONST_STACK_POP(1);                  break; @@ -537,10 +545,10 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,                            ISBASICBLOCK(blocks, h, op_start)) ||                           ((opcode == BUILD_LIST || opcode == BUILD_SET) &&                            ((nextop==COMPARE_OP && -                          (codestr[nexti+1]==6 || -                           codestr[nexti+1]==7)) || -                          nextop == GET_ITER) && ISBASICBLOCK(blocks, h, i + 2))) { -                        h = fold_tuple_on_constants(codestr, h, i+2, opcode, +                          (_Py_OPARG(codestr[nexti]) == PyCmp_IN || +                           _Py_OPARG(codestr[nexti]) == PyCmp_NOT_IN)) || +                          nextop == GET_ITER) && ISBASICBLOCK(blocks, h, i + 1))) { +                        h = fold_tuple_on_constants(codestr, h, i + 1, opcode,                                                      consts, CONST_STACK_LASTN(j), j);                          if (h >= 0) {                              CONST_STACK_POP(j); @@ -550,23 +558,20 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,                      }                  }                  if (nextop != UNPACK_SEQUENCE  || -                    !ISBASICBLOCK(blocks, op_start, i + 2) || +                    !ISBASICBLOCK(blocks, op_start, i + 1) ||                      j != get_arg(codestr, nexti) ||                      opcode == BUILD_SET)                      break;                  if (j < 2) { -                    memset(codestr+op_start, NOP, nexti - op_start + 2); +                    fill_nops(codestr, op_start, nexti + 1);                  } else if (j == 2) { -                    codestr[op_start] = ROT_TWO; -                    codestr[op_start + 1] = 0; -                    memset(codestr + op_start + 2, NOP, nexti - op_start); +                    codestr[op_start] = PACKOPARG(ROT_TWO, 0); +                    fill_nops(codestr, op_start + 1, nexti + 1);                      CONST_STACK_RESET();                  } else if (j == 3) { -                    codestr[op_start] = ROT_THREE; -                    codestr[op_start + 1] = 0; -                    codestr[op_start + 2] = ROT_TWO; -                    codestr[op_start + 3] = 0; -                    memset(codestr + op_start + 4, NOP, nexti - op_start - 2); +                    codestr[op_start] = PACKOPARG(ROT_THREE, 0); +                    codestr[op_start + 1] = PACKOPARG(ROT_TWO, 0); +                    fill_nops(codestr, op_start + 2, nexti + 1);                      CONST_STACK_RESET();                  }                  break; @@ -590,7 +595,7 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,                      break;                  h = lastn_const_start(codestr, op_start, 2);                  if (ISBASICBLOCK(blocks, h, op_start)) { -                    h = fold_binops_on_constants(codestr, h, i+2, opcode, +                    h = fold_binops_on_constants(codestr, h, i + 1, opcode,                                                   consts, CONST_STACK_LASTN(2));                      if (h >= 0) {                          CONST_STACK_POP(2); @@ -608,7 +613,7 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,                      break;                  h = lastn_const_start(codestr, op_start, 1);                  if (ISBASICBLOCK(blocks, h, op_start)) { -                    h = fold_unaryops_on_constants(codestr, h, i+2, opcode, +                    h = fold_unaryops_on_constants(codestr, h, i + 1, opcode,                                                     consts, *CONST_STACK_LASTN(1));                      if (h >= 0) {                          CONST_STACK_POP(1); @@ -628,15 +633,15 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,                     x:JUMP_IF_FALSE_OR_POP y   y:JUMP_IF_FALSE_OR_POP z                        -->  x:JUMP_IF_FALSE_OR_POP z                     x:JUMP_IF_FALSE_OR_POP y   y:JUMP_IF_TRUE_OR_POP z -                      -->  x:POP_JUMP_IF_FALSE y+2 -                   where y+2 is the instruction following the second test. +                      -->  x:POP_JUMP_IF_FALSE y+1 +                   where y+1 is the instruction following the second test.                  */              case JUMP_IF_FALSE_OR_POP:              case JUMP_IF_TRUE_OR_POP: -                h = get_arg(codestr, i); +                h = get_arg(codestr, i) / sizeof(_Py_CODEUNIT);                  tgt = find_op(codestr, h); -                j = codestr[tgt]; +                j = _Py_OPCODE(codestr[tgt]);                  if (CONDITIONAL_JUMP(j)) {                      /* NOTE: all possible jumps here are absolute. */                      if (JUMPS_ON_TRUE(j) == JUMPS_ON_TRUE(opcode)) { @@ -649,14 +654,14 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,                             jump past it), and all conditional jumps pop their                             argument when they're not taken (so change the                             first jump to pop its argument when it's taken). */ -                        h = set_arg(codestr, i, tgt + 2); +                        h = set_arg(codestr, i, (tgt + 1) * sizeof(_Py_CODEUNIT));                          j = opcode == JUMP_IF_TRUE_OR_POP ?                              POP_JUMP_IF_TRUE : POP_JUMP_IF_FALSE;                      }                      if (h >= 0) {                          nexti = h; -                        codestr[nexti] = j; +                        codestr[nexti] = PACKOPARG(j, _Py_OPARG(codestr[nexti]));                          break;                      }                  } @@ -678,32 +683,32 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,                  tgt = find_op(codestr, h);                  /* Replace JUMP_* to a RETURN into just a RETURN */                  if (UNCONDITIONAL_JUMP(opcode) && -                    codestr[tgt] == RETURN_VALUE) { -                    codestr[op_start] = RETURN_VALUE; -                    codestr[op_start + 1] = 0; -                    memset(codestr + op_start + 2, NOP, i - op_start); -                } else if (UNCONDITIONAL_JUMP(codestr[tgt])) { +                    _Py_OPCODE(codestr[tgt]) == RETURN_VALUE) { +                    codestr[op_start] = PACKOPARG(RETURN_VALUE, 0); +                    fill_nops(codestr, op_start + 1, i + 1); +                } else if (UNCONDITIONAL_JUMP(_Py_OPCODE(codestr[tgt]))) {                      j = GETJUMPTGT(codestr, tgt);                      if (opcode == JUMP_FORWARD) { /* JMP_ABS can go backwards */                          opcode = JUMP_ABSOLUTE;                      } else if (!ABSOLUTE_JUMP(opcode)) { -                        if ((Py_ssize_t)j < i + 2) { +                        if ((Py_ssize_t)j < i + 1) {                              break;           /* No backward relative jumps */                          } -                        j -= i + 2;          /* Calc relative jump addr */ +                        j -= i + 1;          /* Calc relative jump addr */                      } -                    copy_op_arg(codestr, op_start, opcode, j, i+2); +                    j *= sizeof(_Py_CODEUNIT); +                    copy_op_arg(codestr, op_start, opcode, j, i + 1);                  }                  break;                  /* Remove unreachable ops after RETURN */              case RETURN_VALUE: -                h = i + 2; -                while (h + 2 < codelen && ISBASICBLOCK(blocks, i, h + 2)) { -                    h += 2; +                h = i + 1; +                while (h + 1 < codelen && ISBASICBLOCK(blocks, i, h + 1)) { +                    h++;                  } -                if (h > i + 2) { -                    memset(codestr + i + 2, NOP, h - i); +                if (h > i + 1) { +                    fill_nops(codestr, i + 1, h + 1);                      nexti = find_op(codestr, h);                  }                  break; @@ -711,20 +716,21 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,      }      /* Fixup lnotab */ -    for (i=0, nops=0 ; i<codelen ; i += 2) { +    for (i = 0, nops = 0; i < codelen; i++) {          assert(i - nops <= INT_MAX);          /* original code offset => new code offset */          blocks[i] = i - nops; -        if (codestr[i] == NOP) -            nops += 2; +        if (_Py_OPCODE(codestr[i]) == NOP) +            nops++;      }      cum_orig_offset = 0;      last_offset = 0;      for (i=0 ; i < tabsiz ; i+=2) {          unsigned int offset_delta, new_offset;          cum_orig_offset += lnotab[i]; -        assert((cum_orig_offset & 1) == 0); -        new_offset = blocks[cum_orig_offset]; +        assert(cum_orig_offset % sizeof(_Py_CODEUNIT) == 0); +        new_offset = blocks[cum_orig_offset / sizeof(_Py_CODEUNIT)] * +                sizeof(_Py_CODEUNIT);          offset_delta = new_offset - last_offset;          assert(offset_delta <= 255);          lnotab[i] = (unsigned char)offset_delta; @@ -732,13 +738,13 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,      }      /* Remove NOPs and fixup jump targets */ -    for (op_start=0, i=0, h=0 ; i<codelen ; i+=2, op_start=i) { -        j = codestr[i+1]; -        while (codestr[i] == EXTENDED_ARG) { -            i += 2; -            j = j<<8 | codestr[i+1]; +    for (op_start = i = h = 0; i < codelen; i++, op_start = i) { +        j = _Py_OPARG(codestr[i]); +        while (_Py_OPCODE(codestr[i]) == EXTENDED_ARG) { +            i++; +            j = j<<8 | _Py_OPARG(codestr[i]);          } -        opcode = codestr[i]; +        opcode = _Py_OPCODE(codestr[i]);          switch (opcode) {              case NOP:continue; @@ -748,7 +754,7 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,              case POP_JUMP_IF_TRUE:              case JUMP_IF_FALSE_OR_POP:              case JUMP_IF_TRUE_OR_POP: -                j = blocks[j]; +                j = blocks[j / sizeof(_Py_CODEUNIT)] * sizeof(_Py_CODEUNIT);                  break;              case FOR_ITER: @@ -758,10 +764,11 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,              case SETUP_FINALLY:              case SETUP_WITH:              case SETUP_ASYNC_WITH: -                j = blocks[j + i + 2] - blocks[i] - 2; +                j = blocks[j / sizeof(_Py_CODEUNIT) + i + 1] - blocks[i] - 1; +                j *= sizeof(_Py_CODEUNIT);                  break;          } -        nexti = i - op_start + 2; +        nexti = i - op_start + 1;          if (instrsize(j) > nexti)              goto exitUnchanged;          /* If instrsize(j) < nexti, we'll emit EXTENDED_ARG 0 */ @@ -772,7 +779,7 @@ PyCode_Optimize(PyObject *code, PyObject* consts, PyObject *names,      CONST_STACK_DELETE();      PyMem_Free(blocks); -    code = PyBytes_FromStringAndSize((char *)codestr, h); +    code = PyBytes_FromStringAndSize((char *)codestr, h * sizeof(_Py_CODEUNIT));      PyMem_Free(codestr);      return code; diff --git a/Python/wordcode_helpers.h b/Python/wordcode_helpers.h index b61ba33d8f..b0e3a91776 100644 --- a/Python/wordcode_helpers.h +++ b/Python/wordcode_helpers.h @@ -2,35 +2,38 @@     optimizer.   */ -/* Minimum number of bytes necessary to encode instruction with EXTENDED_ARGs */ +#ifdef WORDS_BIGENDIAN +#  define PACKOPARG(opcode, oparg) ((_Py_CODEUNIT)(((opcode) << 8) | (oparg))) +#else +#  define PACKOPARG(opcode, oparg) ((_Py_CODEUNIT)(((oparg) << 8) | (opcode))) +#endif + +/* Minimum number of code units necessary to encode instruction with +   EXTENDED_ARGs */  static int  instrsize(unsigned int oparg)  { -    return oparg <= 0xff ? 2 : -        oparg <= 0xffff ? 4 : -        oparg <= 0xffffff ? 6 : -        8; +    return oparg <= 0xff ? 1 : +        oparg <= 0xffff ? 2 : +        oparg <= 0xffffff ? 3 : +        4;  }  /* Spits out op/oparg pair using ilen bytes. codestr should be pointed at the     desired location of the first EXTENDED_ARG */  static void -write_op_arg(unsigned char *codestr, unsigned char opcode, +write_op_arg(_Py_CODEUNIT *codestr, unsigned char opcode,      unsigned int oparg, int ilen)  {      switch (ilen) { -        case 8: -            *codestr++ = EXTENDED_ARG; -            *codestr++ = (oparg >> 24) & 0xff; -        case 6: -            *codestr++ = EXTENDED_ARG; -            *codestr++ = (oparg >> 16) & 0xff;          case 4: -            *codestr++ = EXTENDED_ARG; -            *codestr++ = (oparg >> 8) & 0xff; +            *codestr++ = PACKOPARG(EXTENDED_ARG, (oparg >> 24) & 0xff); +        case 3: +            *codestr++ = PACKOPARG(EXTENDED_ARG, (oparg >> 16) & 0xff);          case 2: -            *codestr++ = opcode; -            *codestr++ = oparg & 0xff; +            *codestr++ = PACKOPARG(EXTENDED_ARG, (oparg >> 8) & 0xff); +        case 1: +            *codestr++ = PACKOPARG(opcode, oparg & 0xff);              break;          default:              assert(0); | 
