diff options
| author | Cherry Zhang <cherryyz@google.com> | 2020-10-28 09:12:20 -0400 |
|---|---|---|
| committer | Cherry Zhang <cherryyz@google.com> | 2020-10-28 09:12:20 -0400 |
| commit | a16e30d162c1c7408db7821e7b9513cefa09c6ca (patch) | |
| tree | af752ba9ba44c547df39bb0af9bff79f610ba9d5 /src/cmd/compile/internal/ssa/gen | |
| parent | 91e4d2d57bc341dd82c98247117114c851380aef (diff) | |
| parent | cf6cfba4d5358404dd890f6025e573a4b2156543 (diff) | |
| download | go-git-dev.link.tar.gz | |
[dev.link] all: merge branch 'master' into dev.linkdev.link
Clean merge.
Change-Id: Ia7b2808bc649790198d34c226a61d9e569084dc5
Diffstat (limited to 'src/cmd/compile/internal/ssa/gen')
19 files changed, 521 insertions, 331 deletions
diff --git a/src/cmd/compile/internal/ssa/gen/386.rules b/src/cmd/compile/internal/ssa/gen/386.rules index 4a8244eb27..4e6cc8c692 100644 --- a/src/cmd/compile/internal/ssa/gen/386.rules +++ b/src/cmd/compile/internal/ssa/gen/386.rules @@ -38,10 +38,8 @@ (Xor(32|16|8) ...) => (XORL ...) (Neg(32|16|8) ...) => (NEGL ...) -(Neg32F x) && !config.use387 => (PXOR x (MOVSSconst <typ.Float32> [float32(math.Copysign(0, -1))])) -(Neg64F x) && !config.use387 => (PXOR x (MOVSDconst <typ.Float64> [math.Copysign(0, -1)])) -(Neg32F x) && config.use387 => (FCHS x) -(Neg64F x) && config.use387 => (FCHS x) +(Neg32F x) => (PXOR x (MOVSSconst <typ.Float32> [float32(math.Copysign(0, -1))])) +(Neg64F x) => (PXOR x (MOVSDconst <typ.Float64> [math.Copysign(0, -1)])) (Com(32|16|8) ...) => (NOTL ...) @@ -312,7 +310,7 @@ (Const32 ...) => (MOVLconst ...) (Const(32|64)F ...) => (MOVS(S|D)const ...) (ConstNil) => (MOVLconst [0]) -(ConstBool [c]) => (MOVLconst [int32(b2i(c))]) +(ConstBool [c]) => (MOVLconst [b2i32(c)]) // Lowering calls (StaticCall ...) => (CALLstatic ...) @@ -670,8 +668,8 @@ // Merge load/store to op ((ADD|AND|OR|XOR|SUB|MUL)L x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) => ((ADD|AND|OR|XOR|SUB|MUL)Lload x [off] {sym} ptr mem) -((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) => ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem) -((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) => ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem) +((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) => ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem) +((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) => ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem) (MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) => ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem) (MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) => ((ADD|SUB|AND|OR|XOR)Lmodify [off] {sym} ptr x mem) diff --git a/src/cmd/compile/internal/ssa/gen/386Ops.go b/src/cmd/compile/internal/ssa/gen/386Ops.go index ddabde7d3d..737b99c371 100644 --- a/src/cmd/compile/internal/ssa/gen/386Ops.go +++ b/src/cmd/compile/internal/ssa/gen/386Ops.go @@ -51,17 +51,6 @@ var regNames386 = []string{ "SB", } -// Notes on 387 support. -// - The 387 has a weird stack-register setup for floating-point registers. -// We use these registers when SSE registers are not available (when GO386=387). -// - We use the same register names (X0-X7) but they refer to the 387 -// floating-point registers. That way, most of the SSA backend is unchanged. -// - The instruction generation pass maintains an SSE->387 register mapping. -// This mapping is updated whenever the FP stack is pushed or popped so that -// we can always find a given SSE register even when the TOS pointer has changed. -// - To facilitate the mapping from SSE to 387, we enforce that -// every basic block starts and ends with an empty floating-point stack. - func init() { // Make map from reg names to reg integers. if len(regNames386) > 64 { @@ -552,9 +541,6 @@ func init() { {name: "FlagGT_UGT"}, // signed > and unsigned < {name: "FlagGT_ULT"}, // signed > and unsigned > - // Special op for -x on 387 - {name: "FCHS", argLength: 1, reg: fp11}, - // Special ops for PIC floating-point constants. // MOVSXconst1 loads the address of the constant-pool entry into a register. // MOVSXconst2 loads the constant from that address. diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules index 408678f054..934e7dfdb6 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64.rules +++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules @@ -401,7 +401,7 @@ (Const32F ...) => (MOVSSconst ...) (Const64F ...) => (MOVSDconst ...) (ConstNil ) => (MOVQconst [0]) -(ConstBool [c]) => (MOVLconst [int32(b2i(c))]) +(ConstBool [c]) => (MOVLconst [b2i32(c)]) // Lowering calls (StaticCall ...) => (CALLstatic ...) @@ -530,8 +530,10 @@ (AtomicCompareAndSwap64 ptr old new_ mem) => (CMPXCHGQlock ptr old new_ mem) // Atomic memory updates. -(AtomicAnd8 ptr val mem) => (ANDBlock ptr val mem) -(AtomicOr8 ptr val mem) => (ORBlock ptr val mem) +(AtomicAnd8 ptr val mem) => (ANDBlock ptr val mem) +(AtomicAnd32 ptr val mem) => (ANDLlock ptr val mem) +(AtomicOr8 ptr val mem) => (ORBlock ptr val mem) +(AtomicOr32 ptr val mem) => (ORLlock ptr val mem) // Write barrier. (WB ...) => (LoweredWB ...) @@ -957,7 +959,7 @@ (MUL(Q|L)const [73] x) => (LEA(Q|L)8 x (LEA(Q|L)8 <v.Type> x x)) (MUL(Q|L)const [81] x) => (LEA(Q|L)8 (LEA(Q|L)8 <v.Type> x x) (LEA(Q|L)8 <v.Type> x x)) -(MUL(Q|L)const [c] x) && isPowerOfTwo(int64(c)+1) && c >= 15 => (SUB(Q|L) (SHL(Q|L)const <v.Type> [int8(log2(int64(c)+1))] x) x) +(MUL(Q|L)const [c] x) && isPowerOfTwo64(int64(c)+1) && c >= 15 => (SUB(Q|L) (SHL(Q|L)const <v.Type> [int8(log2(int64(c)+1))] x) x) (MUL(Q|L)const [c] x) && isPowerOfTwo32(c-1) && c >= 17 => (LEA(Q|L)1 (SHL(Q|L)const <v.Type> [int8(log32(c-1))] x) x) (MUL(Q|L)const [c] x) && isPowerOfTwo32(c-2) && c >= 34 => (LEA(Q|L)2 (SHL(Q|L)const <v.Type> [int8(log32(c-2))] x) x) (MUL(Q|L)const [c] x) && isPowerOfTwo32(c-4) && c >= 68 => (LEA(Q|L)4 (SHL(Q|L)const <v.Type> [int8(log32(c-4))] x) x) @@ -1274,8 +1276,8 @@ (CMPQconst (ANDQconst _ [m]) [n]) && 0 <= m && m < n => (FlagLT_ULT) (CMPQconst (ANDLconst _ [m]) [n]) && 0 <= m && m < n => (FlagLT_ULT) (CMPLconst (ANDLconst _ [m]) [n]) && 0 <= m && m < n => (FlagLT_ULT) -(CMPWconst (ANDLconst _ [m]) [n]) && 0 <= m && int16(m) < n => (FlagLT_ULT) -(CMPBconst (ANDLconst _ [m]) [n]) && 0 <= m && int8(m) < n => (FlagLT_ULT) +(CMPWconst (ANDLconst _ [m]) [n]) && 0 <= int16(m) && int16(m) < n => (FlagLT_ULT) +(CMPBconst (ANDLconst _ [m]) [n]) && 0 <= int8(m) && int8(m) < n => (FlagLT_ULT) // TESTQ c c sets flags like CMPQ c 0. (TESTQconst [c] (MOVQconst [d])) && int64(c) == d && c == 0 => (FlagEQ) diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go index 2df5016d59..de5372670b 100644 --- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go @@ -902,7 +902,9 @@ func init() { // Atomic memory updates. {name: "ANDBlock", argLength: 3, reg: gpstore, asm: "ANDB", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"}, // *(arg0+auxint+aux) &= arg1 + {name: "ANDLlock", argLength: 3, reg: gpstore, asm: "ANDL", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"}, // *(arg0+auxint+aux) &= arg1 {name: "ORBlock", argLength: 3, reg: gpstore, asm: "ORB", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"}, // *(arg0+auxint+aux) |= arg1 + {name: "ORLlock", argLength: 3, reg: gpstore, asm: "ORL", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"}, // *(arg0+auxint+aux) |= arg1 } var AMD64blocks = []blockData{ diff --git a/src/cmd/compile/internal/ssa/gen/ARM.rules b/src/cmd/compile/internal/ssa/gen/ARM.rules index 9490805f46..f48abcd202 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM.rules +++ b/src/cmd/compile/internal/ssa/gen/ARM.rules @@ -169,10 +169,10 @@ (Rsh8x64 x (Const64 [c])) && uint64(c) >= 8 => (SRAconst (SLLconst <typ.UInt32> x [24]) [31]) // constants -(Const(8|16|32) ...) -> (MOVWconst ...) -(Const(32F|64F) ...) -> (MOV(F|D)const ...) +(Const(8|16|32) [val]) => (MOVWconst [int32(val)]) +(Const(32|64)F [val]) => (MOV(F|D)const [float64(val)]) (ConstNil) => (MOVWconst [0]) -(ConstBool ...) -> (MOVWconst ...) +(ConstBool [b]) => (MOVWconst [b2i32(b)]) // truncations // Because we ignore high parts of registers, truncates are just copies. @@ -243,10 +243,10 @@ (Leq16U x y) => (LessEqualU (CMP (ZeroExt16to32 x) (ZeroExt16to32 y))) (Leq32U x y) => (LessEqualU (CMP x y)) -(OffPtr [off] ptr:(SP)) -> (MOVWaddr [off] ptr) -(OffPtr [off] ptr) -> (ADDconst [off] ptr) +(OffPtr [off] ptr:(SP)) => (MOVWaddr [int32(off)] ptr) +(OffPtr [off] ptr) => (ADDconst [int32(off)] ptr) -(Addr ...) -> (MOVWaddr ...) +(Addr {sym} base) => (MOVWaddr {sym} base) (LocalAddr {sym} base _) => (MOVWaddr {sym} base) // loads @@ -1052,8 +1052,8 @@ (BICshiftRL x (MOVWconst [c]) [d]) => (BICconst x [int32(uint32(c)>>uint64(d))]) (BICshiftRA x (MOVWconst [c]) [d]) => (BICconst x [c>>uint64(d)]) (MVNshiftLL (MOVWconst [c]) [d]) => (MOVWconst [^(c<<uint64(d))]) -(MVNshiftRL (MOVWconst [c]) [d]) -> (MOVWconst [^int64(uint32(c)>>uint64(d))]) -(MVNshiftRA (MOVWconst [c]) [d]) -> (MOVWconst [^int64(int32(c)>>uint64(d))]) +(MVNshiftRL (MOVWconst [c]) [d]) => (MOVWconst [^int32(uint32(c)>>uint64(d))]) +(MVNshiftRA (MOVWconst [c]) [d]) => (MOVWconst [int32(c)>>uint64(d)]) (CMPshiftLL x (MOVWconst [c]) [d]) => (CMPconst x [c<<uint64(d)]) (CMPshiftRL x (MOVWconst [c]) [d]) => (CMPconst x [int32(uint32(c)>>uint64(d))]) (CMPshiftRA x (MOVWconst [c]) [d]) => (CMPconst x [c>>uint64(d)]) @@ -1190,12 +1190,12 @@ (MOVWstoreidx ptr (SRAconst idx [c]) val mem) => (MOVWstoreshiftRA ptr idx [c] val mem) (MOVWstoreidx (SRAconst idx [c]) ptr val mem) => (MOVWstoreshiftRA ptr idx [c] val mem) -(MOVWloadshiftLL ptr (MOVWconst [c]) [d] mem) -> (MOVWload [int64(uint32(c)<<uint64(d))] ptr mem) -(MOVWloadshiftRL ptr (MOVWconst [c]) [d] mem) -> (MOVWload [int64(uint32(c)>>uint64(d))] ptr mem) +(MOVWloadshiftLL ptr (MOVWconst [c]) [d] mem) => (MOVWload [int32(uint32(c)<<uint64(d))] ptr mem) +(MOVWloadshiftRL ptr (MOVWconst [c]) [d] mem) => (MOVWload [int32(uint32(c)>>uint64(d))] ptr mem) (MOVWloadshiftRA ptr (MOVWconst [c]) [d] mem) => (MOVWload [c>>uint64(d)] ptr mem) -(MOVWstoreshiftLL ptr (MOVWconst [c]) [d] val mem) -> (MOVWstore [int64(uint32(c)<<uint64(d))] ptr val mem) -(MOVWstoreshiftRL ptr (MOVWconst [c]) [d] val mem) -> (MOVWstore [int64(uint32(c)>>uint64(d))] ptr val mem) +(MOVWstoreshiftLL ptr (MOVWconst [c]) [d] val mem) => (MOVWstore [int32(uint32(c)<<uint64(d))] ptr val mem) +(MOVWstoreshiftRL ptr (MOVWconst [c]) [d] val mem) => (MOVWstore [int32(uint32(c)>>uint64(d))] ptr val mem) (MOVWstoreshiftRA ptr (MOVWconst [c]) [d] val mem) => (MOVWstore [c>>uint64(d)] ptr val mem) // generic simplifications @@ -1263,8 +1263,8 @@ (SRLconst (SLLconst x [c]) [d]) && objabi.GOARM==7 && uint64(d)>=uint64(c) && uint64(d)<=31 => (BFXU [(d-c)|(32-d)<<8] x) // comparison simplification -(CMP x (RSBconst [0] y)) => (CMN x y) -(CMN x (RSBconst [0] y)) => (CMP x y) +((LT|LE|EQ|NE|GE|GT) (CMP x (RSBconst [0] y))) => ((LT|LE|EQ|NE|GE|GT) (CMN x y)) // sense of carry bit not preserved +((LT|LE|EQ|NE|GE|GT) (CMN x (RSBconst [0] y))) => ((LT|LE|EQ|NE|GE|GT) (CMP x y)) // sense of carry bit not preserved (EQ (CMPconst [0] l:(SUB x y)) yes no) && l.Uses==1 => (EQ (CMP x y) yes no) (EQ (CMPconst [0] l:(MULS x y a)) yes no) && l.Uses==1 => (EQ (CMP a (MUL <x.Type> x y)) yes no) (EQ (CMPconst [0] l:(SUBconst [c] x)) yes no) && l.Uses==1 => (EQ (CMPconst [c] x) yes no) @@ -1470,6 +1470,6 @@ (GE (CMPconst [0] l:(XORshiftRLreg x y z)) yes no) && l.Uses==1 => (GE (TEQshiftRLreg x y z) yes no) (GE (CMPconst [0] l:(XORshiftRAreg x y z)) yes no) && l.Uses==1 => (GE (TEQshiftRAreg x y z) yes no) -(MOVBUload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVWconst [int64(read8(sym, off))]) -(MOVHUload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVWconst [int64(read16(sym, off, config.ctxt.Arch.ByteOrder))]) -(MOVWload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVWconst [int64(int32(read32(sym, off, config.ctxt.Arch.ByteOrder)))]) +(MOVBUload [off] {sym} (SB) _) && symIsRO(sym) => (MOVWconst [int32(read8(sym, int64(off)))]) +(MOVHUload [off] {sym} (SB) _) && symIsRO(sym) => (MOVWconst [int32(read16(sym, int64(off), config.ctxt.Arch.ByteOrder))]) +(MOVWload [off] {sym} (SB) _) && symIsRO(sym) => (MOVWconst [int32(read32(sym, int64(off), config.ctxt.Arch.ByteOrder))]) diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules index c4a3532632..c50a8c7778 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64.rules +++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules @@ -548,8 +548,10 @@ (AtomicCompareAndSwap(32|64) ...) => (LoweredAtomicCas(32|64) ...) // Currently the updated value is not used, but we need a register to temporarily hold it. -(AtomicAnd8 ptr val mem) => (Select1 (LoweredAtomicAnd8 ptr val mem)) -(AtomicOr8 ptr val mem) => (Select1 (LoweredAtomicOr8 ptr val mem)) +(AtomicAnd8 ptr val mem) => (Select1 (LoweredAtomicAnd8 ptr val mem)) +(AtomicAnd32 ptr val mem) => (Select1 (LoweredAtomicAnd32 ptr val mem)) +(AtomicOr8 ptr val mem) => (Select1 (LoweredAtomicOr8 ptr val mem)) +(AtomicOr32 ptr val mem) => (Select1 (LoweredAtomicOr32 ptr val mem)) (AtomicAdd(32|64)Variant ...) => (LoweredAtomicAdd(32|64)Variant ...) @@ -1171,145 +1173,145 @@ (MUL x (MOVDconst [-1])) => (NEG x) (MUL _ (MOVDconst [0])) => (MOVDconst [0]) (MUL x (MOVDconst [1])) => x -(MUL x (MOVDconst [c])) && isPowerOfTwo(c) => (SLLconst [log2(c)] x) -(MUL x (MOVDconst [c])) && isPowerOfTwo(c-1) && c >= 3 => (ADDshiftLL x x [log2(c-1)]) -(MUL x (MOVDconst [c])) && isPowerOfTwo(c+1) && c >= 7 => (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)]) -(MUL x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) => (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1])) -(MUL x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) => (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2])) -(MUL x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) => (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3])) -(MUL x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) => (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3])) +(MUL x (MOVDconst [c])) && isPowerOfTwo64(c) => (SLLconst [log2(c)] x) +(MUL x (MOVDconst [c])) && isPowerOfTwo64(c-1) && c >= 3 => (ADDshiftLL x x [log2(c-1)]) +(MUL x (MOVDconst [c])) && isPowerOfTwo64(c+1) && c >= 7 => (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)]) +(MUL x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) => (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1])) +(MUL x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) => (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2])) +(MUL x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) => (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3])) +(MUL x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) => (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3])) (MULW x (MOVDconst [c])) && int32(c)==-1 => (NEG x) (MULW _ (MOVDconst [c])) && int32(c)==0 => (MOVDconst [0]) (MULW x (MOVDconst [c])) && int32(c)==1 => x -(MULW x (MOVDconst [c])) && isPowerOfTwo(c) => (SLLconst [log2(c)] x) -(MULW x (MOVDconst [c])) && isPowerOfTwo(c-1) && int32(c) >= 3 => (ADDshiftLL x x [log2(c-1)]) -(MULW x (MOVDconst [c])) && isPowerOfTwo(c+1) && int32(c) >= 7 => (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)]) -(MULW x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) => (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1])) -(MULW x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) => (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2])) -(MULW x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) => (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3])) -(MULW x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) => (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3])) +(MULW x (MOVDconst [c])) && isPowerOfTwo64(c) => (SLLconst [log2(c)] x) +(MULW x (MOVDconst [c])) && isPowerOfTwo64(c-1) && int32(c) >= 3 => (ADDshiftLL x x [log2(c-1)]) +(MULW x (MOVDconst [c])) && isPowerOfTwo64(c+1) && int32(c) >= 7 => (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)]) +(MULW x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1])) +(MULW x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2])) +(MULW x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3])) +(MULW x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3])) // mneg by constant (MNEG x (MOVDconst [-1])) => x (MNEG _ (MOVDconst [0])) => (MOVDconst [0]) (MNEG x (MOVDconst [1])) => (NEG x) -(MNEG x (MOVDconst [c])) && isPowerOfTwo(c) => (NEG (SLLconst <x.Type> [log2(c)] x)) -(MNEG x (MOVDconst [c])) && isPowerOfTwo(c-1) && c >= 3 => (NEG (ADDshiftLL <x.Type> x x [log2(c-1)])) -(MNEG x (MOVDconst [c])) && isPowerOfTwo(c+1) && c >= 7 => (NEG (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log2(c+1)])) -(MNEG x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) => (SLLconst <x.Type> [log2(c/3)] (SUBshiftLL <x.Type> x x [2])) -(MNEG x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) => (NEG (SLLconst <x.Type> [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))) -(MNEG x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) => (SLLconst <x.Type> [log2(c/7)] (SUBshiftLL <x.Type> x x [3])) -(MNEG x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) => (NEG (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))) +(MNEG x (MOVDconst [c])) && isPowerOfTwo64(c) => (NEG (SLLconst <x.Type> [log2(c)] x)) +(MNEG x (MOVDconst [c])) && isPowerOfTwo64(c-1) && c >= 3 => (NEG (ADDshiftLL <x.Type> x x [log2(c-1)])) +(MNEG x (MOVDconst [c])) && isPowerOfTwo64(c+1) && c >= 7 => (NEG (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log2(c+1)])) +(MNEG x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) => (SLLconst <x.Type> [log2(c/3)] (SUBshiftLL <x.Type> x x [2])) +(MNEG x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) => (NEG (SLLconst <x.Type> [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))) +(MNEG x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) => (SLLconst <x.Type> [log2(c/7)] (SUBshiftLL <x.Type> x x [3])) +(MNEG x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) => (NEG (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))) (MNEGW x (MOVDconst [c])) && int32(c)==-1 => x (MNEGW _ (MOVDconst [c])) && int32(c)==0 => (MOVDconst [0]) (MNEGW x (MOVDconst [c])) && int32(c)==1 => (NEG x) -(MNEGW x (MOVDconst [c])) && isPowerOfTwo(c) => (NEG (SLLconst <x.Type> [log2(c)] x)) -(MNEGW x (MOVDconst [c])) && isPowerOfTwo(c-1) && int32(c) >= 3 => (NEG (ADDshiftLL <x.Type> x x [log2(c-1)])) -(MNEGW x (MOVDconst [c])) && isPowerOfTwo(c+1) && int32(c) >= 7 => (NEG (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log2(c+1)])) -(MNEGW x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) => (SLLconst <x.Type> [log2(c/3)] (SUBshiftLL <x.Type> x x [2])) -(MNEGW x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) => (NEG (SLLconst <x.Type> [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))) -(MNEGW x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) => (SLLconst <x.Type> [log2(c/7)] (SUBshiftLL <x.Type> x x [3])) -(MNEGW x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) => (NEG (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))) +(MNEGW x (MOVDconst [c])) && isPowerOfTwo64(c) => (NEG (SLLconst <x.Type> [log2(c)] x)) +(MNEGW x (MOVDconst [c])) && isPowerOfTwo64(c-1) && int32(c) >= 3 => (NEG (ADDshiftLL <x.Type> x x [log2(c-1)])) +(MNEGW x (MOVDconst [c])) && isPowerOfTwo64(c+1) && int32(c) >= 7 => (NEG (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log2(c+1)])) +(MNEGW x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (SLLconst <x.Type> [log2(c/3)] (SUBshiftLL <x.Type> x x [2])) +(MNEGW x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (NEG (SLLconst <x.Type> [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))) +(MNEGW x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (SLLconst <x.Type> [log2(c/7)] (SUBshiftLL <x.Type> x x [3])) +(MNEGW x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (NEG (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))) (MADD a x (MOVDconst [-1])) => (SUB a x) (MADD a _ (MOVDconst [0])) => a (MADD a x (MOVDconst [1])) => (ADD a x) -(MADD a x (MOVDconst [c])) && isPowerOfTwo(c) => (ADDshiftLL a x [log2(c)]) -(MADD a x (MOVDconst [c])) && isPowerOfTwo(c-1) && c>=3 => (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)])) -(MADD a x (MOVDconst [c])) && isPowerOfTwo(c+1) && c>=7 => (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)])) -(MADD a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)]) -(MADD a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)]) -(MADD a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)]) -(MADD a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)]) +(MADD a x (MOVDconst [c])) && isPowerOfTwo64(c) => (ADDshiftLL a x [log2(c)]) +(MADD a x (MOVDconst [c])) && isPowerOfTwo64(c-1) && c>=3 => (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)])) +(MADD a x (MOVDconst [c])) && isPowerOfTwo64(c+1) && c>=7 => (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)])) +(MADD a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)]) +(MADD a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)]) +(MADD a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)]) +(MADD a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)]) (MADD a (MOVDconst [-1]) x) => (SUB a x) (MADD a (MOVDconst [0]) _) => a (MADD a (MOVDconst [1]) x) => (ADD a x) -(MADD a (MOVDconst [c]) x) && isPowerOfTwo(c) => (ADDshiftLL a x [log2(c)]) -(MADD a (MOVDconst [c]) x) && isPowerOfTwo(c-1) && c>=3 => (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)])) -(MADD a (MOVDconst [c]) x) && isPowerOfTwo(c+1) && c>=7 => (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)])) -(MADD a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)]) -(MADD a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)]) -(MADD a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)]) -(MADD a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)]) +(MADD a (MOVDconst [c]) x) && isPowerOfTwo64(c) => (ADDshiftLL a x [log2(c)]) +(MADD a (MOVDconst [c]) x) && isPowerOfTwo64(c-1) && c>=3 => (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)])) +(MADD a (MOVDconst [c]) x) && isPowerOfTwo64(c+1) && c>=7 => (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)])) +(MADD a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo64(c/3) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)]) +(MADD a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo64(c/5) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)]) +(MADD a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo64(c/7) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)]) +(MADD a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo64(c/9) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)]) (MADDW a x (MOVDconst [c])) && int32(c)==-1 => (SUB a x) (MADDW a _ (MOVDconst [c])) && int32(c)==0 => a (MADDW a x (MOVDconst [c])) && int32(c)==1 => (ADD a x) -(MADDW a x (MOVDconst [c])) && isPowerOfTwo(c) => (ADDshiftLL a x [log2(c)]) -(MADDW a x (MOVDconst [c])) && isPowerOfTwo(c-1) && int32(c)>=3 => (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)])) -(MADDW a x (MOVDconst [c])) && isPowerOfTwo(c+1) && int32(c)>=7 => (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)])) -(MADDW a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)]) -(MADDW a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)]) -(MADDW a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)]) -(MADDW a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)]) +(MADDW a x (MOVDconst [c])) && isPowerOfTwo64(c) => (ADDshiftLL a x [log2(c)]) +(MADDW a x (MOVDconst [c])) && isPowerOfTwo64(c-1) && int32(c)>=3 => (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)])) +(MADDW a x (MOVDconst [c])) && isPowerOfTwo64(c+1) && int32(c)>=7 => (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)])) +(MADDW a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)]) +(MADDW a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)]) +(MADDW a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)]) +(MADDW a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)]) (MADDW a (MOVDconst [c]) x) && int32(c)==-1 => (SUB a x) (MADDW a (MOVDconst [c]) _) && int32(c)==0 => a (MADDW a (MOVDconst [c]) x) && int32(c)==1 => (ADD a x) -(MADDW a (MOVDconst [c]) x) && isPowerOfTwo(c) => (ADDshiftLL a x [log2(c)]) -(MADDW a (MOVDconst [c]) x) && isPowerOfTwo(c-1) && int32(c)>=3 => (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)])) -(MADDW a (MOVDconst [c]) x) && isPowerOfTwo(c+1) && int32(c)>=7 => (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)])) -(MADDW a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)]) -(MADDW a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)]) -(MADDW a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)]) -(MADDW a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)]) +(MADDW a (MOVDconst [c]) x) && isPowerOfTwo64(c) => (ADDshiftLL a x [log2(c)]) +(MADDW a (MOVDconst [c]) x) && isPowerOfTwo64(c-1) && int32(c)>=3 => (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)])) +(MADDW a (MOVDconst [c]) x) && isPowerOfTwo64(c+1) && int32(c)>=7 => (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)])) +(MADDW a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)]) +(MADDW a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)]) +(MADDW a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)]) +(MADDW a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)]) (MSUB a x (MOVDconst [-1])) => (ADD a x) (MSUB a _ (MOVDconst [0])) => a (MSUB a x (MOVDconst [1])) => (SUB a x) -(MSUB a x (MOVDconst [c])) && isPowerOfTwo(c) => (SUBshiftLL a x [log2(c)]) -(MSUB a x (MOVDconst [c])) && isPowerOfTwo(c-1) && c>=3 => (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)])) -(MSUB a x (MOVDconst [c])) && isPowerOfTwo(c+1) && c>=7 => (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)])) -(MSUB a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)]) -(MSUB a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)]) -(MSUB a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)]) -(MSUB a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)]) +(MSUB a x (MOVDconst [c])) && isPowerOfTwo64(c) => (SUBshiftLL a x [log2(c)]) +(MSUB a x (MOVDconst [c])) && isPowerOfTwo64(c-1) && c>=3 => (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)])) +(MSUB a x (MOVDconst [c])) && isPowerOfTwo64(c+1) && c>=7 => (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)])) +(MSUB a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)]) +(MSUB a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)]) +(MSUB a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)]) +(MSUB a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)]) (MSUB a (MOVDconst [-1]) x) => (ADD a x) (MSUB a (MOVDconst [0]) _) => a (MSUB a (MOVDconst [1]) x) => (SUB a x) -(MSUB a (MOVDconst [c]) x) && isPowerOfTwo(c) => (SUBshiftLL a x [log2(c)]) -(MSUB a (MOVDconst [c]) x) && isPowerOfTwo(c-1) && c>=3 => (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)])) -(MSUB a (MOVDconst [c]) x) && isPowerOfTwo(c+1) && c>=7 => (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)])) -(MSUB a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)]) -(MSUB a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)]) -(MSUB a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)]) -(MSUB a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)]) +(MSUB a (MOVDconst [c]) x) && isPowerOfTwo64(c) => (SUBshiftLL a x [log2(c)]) +(MSUB a (MOVDconst [c]) x) && isPowerOfTwo64(c-1) && c>=3 => (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)])) +(MSUB a (MOVDconst [c]) x) && isPowerOfTwo64(c+1) && c>=7 => (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)])) +(MSUB a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo64(c/3) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)]) +(MSUB a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo64(c/5) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)]) +(MSUB a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo64(c/7) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)]) +(MSUB a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo64(c/9) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)]) (MSUBW a x (MOVDconst [c])) && int32(c)==-1 => (ADD a x) (MSUBW a _ (MOVDconst [c])) && int32(c)==0 => a (MSUBW a x (MOVDconst [c])) && int32(c)==1 => (SUB a x) -(MSUBW a x (MOVDconst [c])) && isPowerOfTwo(c) => (SUBshiftLL a x [log2(c)]) -(MSUBW a x (MOVDconst [c])) && isPowerOfTwo(c-1) && int32(c)>=3 => (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)])) -(MSUBW a x (MOVDconst [c])) && isPowerOfTwo(c+1) && int32(c)>=7 => (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)])) -(MSUBW a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)]) -(MSUBW a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)]) -(MSUBW a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)]) -(MSUBW a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)]) +(MSUBW a x (MOVDconst [c])) && isPowerOfTwo64(c) => (SUBshiftLL a x [log2(c)]) +(MSUBW a x (MOVDconst [c])) && isPowerOfTwo64(c-1) && int32(c)>=3 => (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)])) +(MSUBW a x (MOVDconst [c])) && isPowerOfTwo64(c+1) && int32(c)>=7 => (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)])) +(MSUBW a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)]) +(MSUBW a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)]) +(MSUBW a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)]) +(MSUBW a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)]) (MSUBW a (MOVDconst [c]) x) && int32(c)==-1 => (ADD a x) (MSUBW a (MOVDconst [c]) _) && int32(c)==0 => a (MSUBW a (MOVDconst [c]) x) && int32(c)==1 => (SUB a x) -(MSUBW a (MOVDconst [c]) x) && isPowerOfTwo(c) => (SUBshiftLL a x [log2(c)]) -(MSUBW a (MOVDconst [c]) x) && isPowerOfTwo(c-1) && int32(c)>=3 => (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)])) -(MSUBW a (MOVDconst [c]) x) && isPowerOfTwo(c+1) && int32(c)>=7 => (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)])) -(MSUBW a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)]) -(MSUBW a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)]) -(MSUBW a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)]) -(MSUBW a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)]) +(MSUBW a (MOVDconst [c]) x) && isPowerOfTwo64(c) => (SUBshiftLL a x [log2(c)]) +(MSUBW a (MOVDconst [c]) x) && isPowerOfTwo64(c-1) && int32(c)>=3 => (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)])) +(MSUBW a (MOVDconst [c]) x) && isPowerOfTwo64(c+1) && int32(c)>=7 => (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)])) +(MSUBW a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)]) +(MSUBW a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)]) +(MSUBW a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)]) +(MSUBW a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)]) // div by constant (UDIV x (MOVDconst [1])) => x -(UDIV x (MOVDconst [c])) && isPowerOfTwo(c) => (SRLconst [log2(c)] x) +(UDIV x (MOVDconst [c])) && isPowerOfTwo64(c) => (SRLconst [log2(c)] x) (UDIVW x (MOVDconst [c])) && uint32(c)==1 => x -(UDIVW x (MOVDconst [c])) && isPowerOfTwo(c) && is32Bit(c) => (SRLconst [log2(c)] x) +(UDIVW x (MOVDconst [c])) && isPowerOfTwo64(c) && is32Bit(c) => (SRLconst [log2(c)] x) (UMOD _ (MOVDconst [1])) => (MOVDconst [0]) -(UMOD x (MOVDconst [c])) && isPowerOfTwo(c) => (ANDconst [c-1] x) +(UMOD x (MOVDconst [c])) && isPowerOfTwo64(c) => (ANDconst [c-1] x) (UMODW _ (MOVDconst [c])) && uint32(c)==1 => (MOVDconst [0]) -(UMODW x (MOVDconst [c])) && isPowerOfTwo(c) && is32Bit(c) => (ANDconst [c-1] x) +(UMODW x (MOVDconst [c])) && isPowerOfTwo64(c) && is32Bit(c) => (ANDconst [c-1] x) // generic simplifications (ADD x (NEG y)) => (SUB x y) diff --git a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go index 9ff53f7e4e..fe9edbf933 100644 --- a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go @@ -656,12 +656,14 @@ func init() { // atomic and/or. // *arg0 &= (|=) arg1. arg2=mem. returns <new content of *arg0, memory>. auxint must be zero. - // LDAXRB (Rarg0), Rout + // LDAXR (Rarg0), Rout // AND/OR Rarg1, Rout - // STLXRB Rout, (Rarg0), Rtmp + // STLXR Rout, (Rarg0), Rtmp // CBNZ Rtmp, -3(PC) {name: "LoweredAtomicAnd8", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "AND", typ: "(UInt8,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, + {name: "LoweredAtomicAnd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "AND", typ: "(UInt32,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, {name: "LoweredAtomicOr8", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "ORR", typ: "(UInt8,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, + {name: "LoweredAtomicOr32", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "ORR", typ: "(UInt32,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true}, // LoweredWB invokes runtime.gcWriteBarrier. arg0=destptr, arg1=srcptr, arg2=mem, aux=runtime.gcWriteBarrier // It saves all GP registers if necessary, diff --git a/src/cmd/compile/internal/ssa/gen/MIPS.rules b/src/cmd/compile/internal/ssa/gen/MIPS.rules index 96feaf9234..b6e5312224 100644 --- a/src/cmd/compile/internal/ssa/gen/MIPS.rules +++ b/src/cmd/compile/internal/ssa/gen/MIPS.rules @@ -143,7 +143,7 @@ (Const(32|16|8) [val]) => (MOVWconst [int32(val)]) (Const(32|64)F ...) => (MOV(F|D)const ...) (ConstNil) => (MOVWconst [0]) -(ConstBool [b]) => (MOVWconst [int32(b2i(b))]) +(ConstBool [b]) => (MOVWconst [b2i32(b)]) // truncations // Because we ignore high parts of registers, truncates are just copies. @@ -383,6 +383,9 @@ (ANDconst <typ.UInt32> [3] (XORconst <typ.UInt32> [3] ptr)))))) mem) +(AtomicAnd32 ...) => (LoweredAtomicAnd ...) +(AtomicOr32 ...) => (LoweredAtomicOr ...) + // checks (NilCheck ...) => (LoweredNilCheck ...) @@ -581,13 +584,13 @@ (Select0 (MULTU (MOVWconst [1]) _ )) => (MOVWconst [0]) (Select1 (MULTU (MOVWconst [-1]) x )) => (NEG <x.Type> x) (Select0 (MULTU (MOVWconst [-1]) x )) => (CMOVZ (ADDconst <x.Type> [-1] x) (MOVWconst [0]) x) -(Select1 (MULTU (MOVWconst [c]) x )) && isPowerOfTwo(int64(uint32(c))) => (SLLconst [int32(log2uint32(int64(c)))] x) -(Select0 (MULTU (MOVWconst [c]) x )) && isPowerOfTwo(int64(uint32(c))) => (SRLconst [int32(32-log2uint32(int64(c)))] x) +(Select1 (MULTU (MOVWconst [c]) x )) && isPowerOfTwo64(int64(uint32(c))) => (SLLconst [int32(log2uint32(int64(c)))] x) +(Select0 (MULTU (MOVWconst [c]) x )) && isPowerOfTwo64(int64(uint32(c))) => (SRLconst [int32(32-log2uint32(int64(c)))] x) (MUL (MOVWconst [0]) _ ) => (MOVWconst [0]) (MUL (MOVWconst [1]) x ) => x (MUL (MOVWconst [-1]) x ) => (NEG x) -(MUL (MOVWconst [c]) x ) && isPowerOfTwo(int64(uint32(c))) => (SLLconst [int32(log2uint32(int64(c)))] x) +(MUL (MOVWconst [c]) x ) && isPowerOfTwo64(int64(uint32(c))) => (SLLconst [int32(log2uint32(int64(c)))] x) // generic simplifications (ADD x (NEG y)) => (SUB x y) diff --git a/src/cmd/compile/internal/ssa/gen/MIPS64.rules b/src/cmd/compile/internal/ssa/gen/MIPS64.rules index e008ec8703..8e4c3a07c8 100644 --- a/src/cmd/compile/internal/ssa/gen/MIPS64.rules +++ b/src/cmd/compile/internal/ssa/gen/MIPS64.rules @@ -580,13 +580,13 @@ (Select1 (MULVU x (MOVVconst [-1]))) => (NEGV x) (Select1 (MULVU _ (MOVVconst [0]))) => (MOVVconst [0]) (Select1 (MULVU x (MOVVconst [1]))) => x -(Select1 (MULVU x (MOVVconst [c]))) && isPowerOfTwo(c) => (SLLVconst [log2(c)] x) +(Select1 (MULVU x (MOVVconst [c]))) && isPowerOfTwo64(c) => (SLLVconst [log2(c)] x) // div by constant (Select1 (DIVVU x (MOVVconst [1]))) => x -(Select1 (DIVVU x (MOVVconst [c]))) && isPowerOfTwo(c) => (SRLVconst [log2(c)] x) +(Select1 (DIVVU x (MOVVconst [c]))) && isPowerOfTwo64(c) => (SRLVconst [log2(c)] x) (Select0 (DIVVU _ (MOVVconst [1]))) => (MOVVconst [0]) // mod -(Select0 (DIVVU x (MOVVconst [c]))) && isPowerOfTwo(c) => (ANDconst [c-1] x) // mod +(Select0 (DIVVU x (MOVVconst [c]))) && isPowerOfTwo64(c) => (ANDconst [c-1] x) // mod // generic simplifications (ADDV x (NEGV y)) => (SUBV x y) diff --git a/src/cmd/compile/internal/ssa/gen/PPC64.rules b/src/cmd/compile/internal/ssa/gen/PPC64.rules index de30d003e6..558b09c9f2 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64.rules +++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules @@ -150,6 +150,31 @@ (ROTLW x (MOVDconst [c])) => (ROTLWconst x [c&31]) (ROTL x (MOVDconst [c])) => (ROTLconst x [c&63]) +// Combine rotate and mask operations +(ANDconst [m] (ROTLWconst [r] x)) && isPPC64WordRotateMask(m) => (RLWINM [encodePPC64RotateMask(r,m,32)] x) +(AND (MOVDconst [m]) (ROTLWconst [r] x)) && isPPC64WordRotateMask(m) => (RLWINM [encodePPC64RotateMask(r,m,32)] x) +(ANDconst [m] (ROTLW x r)) && isPPC64WordRotateMask(m) => (RLWNM [encodePPC64RotateMask(0,m,32)] x r) +(AND (MOVDconst [m]) (ROTLW x r)) && isPPC64WordRotateMask(m) => (RLWNM [encodePPC64RotateMask(0,m,32)] x r) + +// Note, any rotated word bitmask is still a valid word bitmask. +(ROTLWconst [r] (AND (MOVDconst [m]) x)) && isPPC64WordRotateMask(m) => (RLWINM [encodePPC64RotateMask(r,rotateLeft32(m,r),32)] x) +(ROTLWconst [r] (ANDconst [m] x)) && isPPC64WordRotateMask(m) => (RLWINM [encodePPC64RotateMask(r,rotateLeft32(m,r),32)] x) + +(ANDconst [m] (SRWconst x [s])) && mergePPC64RShiftMask(m,s,32) == 0 => (MOVDconst [0]) +(ANDconst [m] (SRWconst x [s])) && mergePPC64AndSrwi(m,s) != 0 => (RLWINM [mergePPC64AndSrwi(m,s)] x) +(AND (MOVDconst [m]) (SRWconst x [s])) && mergePPC64RShiftMask(m,s,32) == 0 => (MOVDconst [0]) +(AND (MOVDconst [m]) (SRWconst x [s])) && mergePPC64AndSrwi(m,s) != 0 => (RLWINM [mergePPC64AndSrwi(m,s)] x) + +(SRWconst (ANDconst [m] x) [s]) && mergePPC64RShiftMask(m>>uint(s),s,32) == 0 => (MOVDconst [0]) +(SRWconst (ANDconst [m] x) [s]) && mergePPC64AndSrwi(m>>uint(s),s) != 0 => (RLWINM [mergePPC64AndSrwi(m>>uint(s),s)] x) +(SRWconst (AND (MOVDconst [m]) x) [s]) && mergePPC64RShiftMask(m>>uint(s),s,32) == 0 => (MOVDconst [0]) +(SRWconst (AND (MOVDconst [m]) x) [s]) && mergePPC64AndSrwi(m>>uint(s),s) != 0 => (RLWINM [mergePPC64AndSrwi(m>>uint(s),s)] x) + +// Merge shift right + shift left and clear left (e.g for a table lookup) +(CLRLSLDI [c] (SRWconst [s] x)) && mergePPC64ClrlsldiSrw(int64(c),s) != 0 => (RLWINM [mergePPC64ClrlsldiSrw(int64(c),s)] x) +(SLDconst [l] (SRWconst [r] x)) && mergePPC64SldiSrw(l,r) != 0 => (RLWINM [mergePPC64SldiSrw(l,r)] x) +// The following reduction shows up frequently too. e.g b[(x>>14)&0xFF] +(CLRLSLDI [c] i:(RLWINM [s] x)) && mergePPC64ClrlsldiRlwinm(c,s) != 0 => (RLWINM [mergePPC64ClrlsldiRlwinm(c,s)] x) // large constant shifts (Lsh64x64 _ (MOVDconst [c])) && uint64(c) >= 64 => (MOVDconst [0]) @@ -821,6 +846,8 @@ (ADDconst [c] (MOVDaddr [d] {sym} x)) && is32Bit(c+int64(d)) => (MOVDaddr [int32(c+int64(d))] {sym} x) +(MULL(W|D) x (MOVDconst [c])) && is16Bit(c) => (MULL(W|D)const [int32(c)] x) + // Subtract from (with carry, but ignored) constant. // Note, these clobber the carry bit. (SUB (MOVDconst [c]) x) && is32Bit(c) => (SUBFCconst [c] x) @@ -965,10 +992,10 @@ // atomic intrinsics (AtomicLoad(8|32|64|Ptr) ptr mem) => (LoweredAtomicLoad(8|32|64|Ptr) [1] ptr mem) -(AtomicLoadAcq32 ptr mem) => (LoweredAtomicLoad32 [0] ptr mem) +(AtomicLoadAcq(32|64) ptr mem) => (LoweredAtomicLoad(32|64) [0] ptr mem) (AtomicStore(8|32|64) ptr val mem) => (LoweredAtomicStore(8|32|64) [1] ptr val mem) -(AtomicStoreRel32 ptr val mem) => (LoweredAtomicStore32 [0] ptr val mem) +(AtomicStoreRel(32|64) ptr val mem) => (LoweredAtomicStore(32|64) [0] ptr val mem) //(AtomicStorePtrNoWB ptr val mem) => (STLR ptr val mem) (AtomicExchange(32|64) ...) => (LoweredAtomicExchange(32|64) ...) @@ -978,8 +1005,10 @@ (AtomicCompareAndSwap(32|64) ptr old new_ mem) => (LoweredAtomicCas(32|64) [1] ptr old new_ mem) (AtomicCompareAndSwapRel32 ptr old new_ mem) => (LoweredAtomicCas32 [0] ptr old new_ mem) -(AtomicAnd8 ...) => (LoweredAtomicAnd8 ...) -(AtomicOr8 ...) => (LoweredAtomicOr8 ...) +(AtomicAnd8 ...) => (LoweredAtomicAnd8 ...) +(AtomicAnd32 ...) => (LoweredAtomicAnd32 ...) +(AtomicOr8 ...) => (LoweredAtomicOr8 ...) +(AtomicOr32 ...) => (LoweredAtomicOr32 ...) (Slicemask <t> x) => (SRADconst (NEG <t> x) [63]) @@ -1018,13 +1047,12 @@ (SLDconst [c] z:(MOVHZreg x)) && c < 16 && z.Uses == 1 => (CLRLSLDI [newPPC64ShiftAuxInt(c,48,63,64)] x) (SLDconst [c] z:(MOVWZreg x)) && c < 32 && z.Uses == 1 => (CLRLSLDI [newPPC64ShiftAuxInt(c,32,63,64)] x) -(SLDconst [c] z:(ANDconst [d] x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) -(SLDconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) +(SLDconst [c] z:(ANDconst [d] x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) && c <= (64-getPPC64ShiftMaskLength(d)) => (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) +(SLDconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) && c<=(64-getPPC64ShiftMaskLength(d)) => (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x) (SLWconst [c] z:(MOVBZreg x)) && z.Uses == 1 && c < 8 => (CLRLSLWI [newPPC64ShiftAuxInt(c,24,31,32)] x) (SLWconst [c] z:(MOVHZreg x)) && z.Uses == 1 && c < 16 => (CLRLSLWI [newPPC64ShiftAuxInt(c,16,31,32)] x) -(SLWconst [c] z:(MOVWZreg x)) && z.Uses == 1 && c < 24 => (CLRLSLWI [newPPC64ShiftAuxInt(c,8,31,32)] x) -(SLWconst [c] z:(ANDconst [d] x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) -(SLWconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) +(SLWconst [c] z:(ANDconst [d] x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) && c<=(32-getPPC64ShiftMaskLength(d)) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) +(SLWconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) && c<=(32-getPPC64ShiftMaskLength(d)) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x) // special case for power9 (SL(W|D)const [c] z:(MOVWreg x)) && c < 32 && objabi.GOPPC64 >= 9 => (EXTSWSLconst [c] x) diff --git a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go index 28317928a8..f7198b90c3 100644 --- a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go @@ -137,6 +137,7 @@ func init() { gp01 = regInfo{inputs: nil, outputs: []regMask{gp}} gp11 = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}} gp21 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}} + gp21a0 = regInfo{inputs: []regMask{gp, gp | sp | sb}, outputs: []regMask{gp}} gp31 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}} gp22 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, gp}} gp32 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, gp}} @@ -181,6 +182,8 @@ func init() { {name: "MULLD", argLength: 2, reg: gp21, asm: "MULLD", typ: "Int64", commutative: true}, // arg0*arg1 (signed 64-bit) {name: "MULLW", argLength: 2, reg: gp21, asm: "MULLW", typ: "Int32", commutative: true}, // arg0*arg1 (signed 32-bit) + {name: "MULLDconst", argLength: 1, reg: gp11, asm: "MULLD", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit) + {name: "MULLWconst", argLength: 1, reg: gp11, asm: "MULLW", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit) {name: "MADDLD", argLength: 3, reg: gp31, asm: "MADDLD", typ: "Int64"}, // (arg0*arg1)+arg2 (signed 64-bit) {name: "MULHD", argLength: 2, reg: gp21, asm: "MULHD", commutative: true}, // (arg0 * arg1) >> 64, signed @@ -225,6 +228,10 @@ func init() { {name: "ROTLWconst", argLength: 1, reg: gp11, asm: "ROTLW", aux: "Int64"}, // uint32(arg0) rotate left by auxInt bits {name: "EXTSWSLconst", argLength: 1, reg: gp11, asm: "EXTSWSLI", aux: "Int64"}, + {name: "RLWINM", argLength: 1, reg: gp11, asm: "RLWNM", aux: "Int64"}, // Rotate and mask by immediate "rlwinm". encodePPC64RotateMask describes aux + {name: "RLWNM", argLength: 2, reg: gp21, asm: "RLWNM", aux: "Int64"}, // Rotate and mask by "rlwnm". encodePPC64RotateMask describes aux + {name: "RLWMI", argLength: 2, reg: gp21a0, asm: "RLWMI", aux: "Int64", resultInArg0: true}, // "rlwimi" similar aux encoding as above + {name: "CNTLZD", argLength: 1, reg: gp11, asm: "CNTLZD", clobberFlags: true}, // count leading zeros {name: "CNTLZW", argLength: 1, reg: gp11, asm: "CNTLZW", clobberFlags: true}, // count leading zeros (32 bit) @@ -600,25 +607,22 @@ func init() { {name: "LoweredAtomicLoadPtr", argLength: 2, reg: gpload, typ: "Int64", aux: "Int64", clobberFlags: true, faultOnNilArg0: true}, // atomic add32, 64 - // SYNC + // LWSYNC // LDAR (Rarg0), Rout // ADD Rarg1, Rout // STDCCC Rout, (Rarg0) // BNE -3(PC) - // ISYNC // return new sum - {name: "LoweredAtomicAdd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true}, {name: "LoweredAtomicAdd64", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true}, // atomic exchange32, 64 - // SYNC + // LWSYNC // LDAR (Rarg0), Rout // STDCCC Rarg1, (Rarg0) // BNE -2(PC) // ISYNC // return old val - {name: "LoweredAtomicExchange32", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true}, {name: "LoweredAtomicExchange64", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true}, @@ -641,15 +645,16 @@ func init() { {name: "LoweredAtomicCas64", argLength: 4, reg: gpcas, resultNotInArgs: true, aux: "Int64", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true}, {name: "LoweredAtomicCas32", argLength: 4, reg: gpcas, resultNotInArgs: true, aux: "Int64", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true}, - // atomic 8 and/or. + // atomic 8/32 and/or. // *arg0 &= (|=) arg1. arg2=mem. returns memory. auxint must be zero. - // LBAR (Rarg0), Rtmp + // LBAR/LWAT (Rarg0), Rtmp // AND/OR Rarg1, Rtmp - // STBCCC Rtmp, (Rarg0), Rtmp + // STBCCC/STWCCC Rtmp, (Rarg0), Rtmp // BNE Rtmp, -3(PC) - {name: "LoweredAtomicAnd8", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true}, + {name: "LoweredAtomicAnd32", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true}, {name: "LoweredAtomicOr8", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true}, + {name: "LoweredAtomicOr32", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true}, // LoweredWB invokes runtime.gcWriteBarrier. arg0=destptr, arg1=srcptr, arg2=mem, aux=runtime.gcWriteBarrier // It preserves R0 through R17 (except special registers R1, R2, R11, R12, R13), g, and its arguments R20 and R21, diff --git a/src/cmd/compile/internal/ssa/gen/RISCV64.rules b/src/cmd/compile/internal/ssa/gen/RISCV64.rules index 9437c8e9d4..325cbeb825 100644 --- a/src/cmd/compile/internal/ssa/gen/RISCV64.rules +++ b/src/cmd/compile/internal/ssa/gen/RISCV64.rules @@ -3,11 +3,7 @@ // license that can be found in the LICENSE file. // Optimizations TODO: -// * Somehow track when values are already zero/signed-extended, avoid re-extending. // * Use SLTI and SLTIU for comparisons to constants, instead of SLT/SLTU with constants in registers -// * Find a more efficient way to do zero/sign extension than left+right shift. -// There are many other options (store then load-extend, LUI+ANDI for zero extend, special case 32->64, ...), -// but left+right shift is simple and uniform, and we don't have real hardware to do perf testing on anyway. // * Use the zero register instead of moving 0 into a register. // * Add rules to avoid generating a temp bool value for (If (SLT[U] ...) ...). // * Optimize left and right shift by simplifying SLTIU, Neg, and ADD for constants. @@ -66,8 +62,8 @@ (Mod32u ...) => (REMUW ...) (Mod16 x y [false]) => (REMW (SignExt16to32 x) (SignExt16to32 y)) (Mod16u x y) => (REMUW (ZeroExt16to32 x) (ZeroExt16to32 y)) -(Mod8 x y) => (REMW (SignExt8to32 x) (SignExt8to32 y)) -(Mod8u x y) => (REMUW (ZeroExt8to32 x) (ZeroExt8to32 y)) +(Mod8 x y) => (REMW (SignExt8to32 x) (SignExt8to32 y)) +(Mod8u x y) => (REMUW (ZeroExt8to32 x) (ZeroExt8to32 y)) (And64 ...) => (AND ...) (And32 ...) => (AND ...) @@ -98,25 +94,21 @@ (Sqrt ...) => (FSQRTD ...) -// Zero and sign extension -// Shift left until the bits we want are at the top of the register. -// Then logical/arithmetic shift right for zero/sign extend. -// We always extend to 64 bits; there's no reason not to, -// and optimization rules can then collapse some extensions. - -(SignExt8to16 <t> x) => (SRAI [56] (SLLI <t> [56] x)) -(SignExt8to32 <t> x) => (SRAI [56] (SLLI <t> [56] x)) -(SignExt8to64 <t> x) => (SRAI [56] (SLLI <t> [56] x)) -(SignExt16to32 <t> x) => (SRAI [48] (SLLI <t> [48] x)) -(SignExt16to64 <t> x) => (SRAI [48] (SLLI <t> [48] x)) -(SignExt32to64 <t> x) => (ADDIW [0] x) - -(ZeroExt8to16 <t> x) => (SRLI [56] (SLLI <t> [56] x)) -(ZeroExt8to32 <t> x) => (SRLI [56] (SLLI <t> [56] x)) -(ZeroExt8to64 <t> x) => (SRLI [56] (SLLI <t> [56] x)) -(ZeroExt16to32 <t> x) => (SRLI [48] (SLLI <t> [48] x)) -(ZeroExt16to64 <t> x) => (SRLI [48] (SLLI <t> [48] x)) -(ZeroExt32to64 <t> x) => (SRLI [32] (SLLI <t> [32] x)) +// Sign and zero extension. + +(SignExt8to16 ...) => (MOVBreg ...) +(SignExt8to32 ...) => (MOVBreg ...) +(SignExt8to64 ...) => (MOVBreg ...) +(SignExt16to32 ...) => (MOVHreg ...) +(SignExt16to64 ...) => (MOVHreg ...) +(SignExt32to64 ...) => (MOVWreg ...) + +(ZeroExt8to16 ...) => (MOVBUreg ...) +(ZeroExt8to32 ...) => (MOVBUreg ...) +(ZeroExt8to64 ...) => (MOVBUreg ...) +(ZeroExt16to32 ...) => (MOVHUreg ...) +(ZeroExt16to64 ...) => (MOVHUreg ...) +(ZeroExt32to64 ...) => (MOVWUreg ...) (Cvt32to32F ...) => (FCVTSW ...) (Cvt32to64F ...) => (FCVTDW ...) @@ -261,16 +253,16 @@ (EqPtr x y) => (SEQZ (SUB <x.Type> x y)) (Eq64 x y) => (SEQZ (SUB <x.Type> x y)) (Eq32 x y) => (SEQZ (SUBW <x.Type> x y)) -(Eq16 x y) => (SEQZ (ZeroExt16to64 (SUB <x.Type> x y))) -(Eq8 x y) => (SEQZ (ZeroExt8to64 (SUB <x.Type> x y))) +(Eq16 x y) => (SEQZ (SUB <x.Type> (ZeroExt16to64 x) (ZeroExt16to64 y))) +(Eq8 x y) => (SEQZ (SUB <x.Type> (ZeroExt8to64 x) (ZeroExt8to64 y))) (Eq64F ...) => (FEQD ...) (Eq32F ...) => (FEQS ...) (NeqPtr x y) => (SNEZ (SUB <x.Type> x y)) (Neq64 x y) => (SNEZ (SUB <x.Type> x y)) (Neq32 x y) => (SNEZ (SUBW <x.Type> x y)) -(Neq16 x y) => (SNEZ (ZeroExt16to64 (SUB <x.Type> x y))) -(Neq8 x y) => (SNEZ (ZeroExt8to64 (SUB <x.Type> x y))) +(Neq16 x y) => (SNEZ (SUB <x.Type> (ZeroExt16to64 x) (ZeroExt16to64 y))) +(Neq8 x y) => (SNEZ (SUB <x.Type> (ZeroExt8to64 x) (ZeroExt8to64 y))) (Neq64F ...) => (FNED ...) (Neq32F ...) => (FNES ...) @@ -291,8 +283,8 @@ (Store {t} ptr val mem) && t.Size() == 2 => (MOVHstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 4 && !is32BitFloat(val.Type) => (MOVWstore ptr val mem) (Store {t} ptr val mem) && t.Size() == 8 && !is64BitFloat(val.Type) => (MOVDstore ptr val mem) -(Store {t} ptr val mem) && t.Size() == 4 && is32BitFloat(val.Type) => (FMOVWstore ptr val mem) -(Store {t} ptr val mem) && t.Size() == 8 && is64BitFloat(val.Type) => (FMOVDstore ptr val mem) +(Store {t} ptr val mem) && t.Size() == 4 && is32BitFloat(val.Type) => (FMOVWstore ptr val mem) +(Store {t} ptr val mem) && t.Size() == 8 && is64BitFloat(val.Type) => (FMOVDstore ptr val mem) // We need to fold MOVaddr into the LD/MOVDstore ops so that the live variable analysis // knows what variables are being read/written by the ops. @@ -368,6 +360,13 @@ (Zero [4] ptr mem) => (MOVWstore ptr (MOVWconst) mem) (Zero [8] ptr mem) => (MOVDstore ptr (MOVDconst) mem) +// Medium zeroing uses a Duff's device +// 8 and 128 are magic constants, see runtime/mkduff.go +(Zero [s] {t} ptr mem) + && s%8 == 0 && s >= 16 && s <= 8*128 + && t.Alignment()%8 == 0 && !config.noDuffDevice => + (DUFFZERO [8 * (128 - s/8)] ptr mem) + // Generic zeroing uses a loop (Zero [s] {t} ptr mem) => (LoweredZero [t.Alignment()] @@ -403,6 +402,13 @@ (Move [4] dst src mem) => (MOVWstore dst (MOVWload src mem) mem) (Move [8] dst src mem) => (MOVDstore dst (MOVDload src mem) mem) +// Medium move uses a Duff's device +// 16 and 128 are magic constants, see runtime/mkduff.go +(Move [s] {t} dst src mem) + && s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0 + && !config.noDuffDevice && logLargeCopy(v, s) => + (DUFFCOPY [16 * (128 - s/8)] dst src mem) + // Generic move uses a loop (Move [s] {t} dst src mem) && (s <= 16 || logLargeCopy(v, s)) => (LoweredMove [t.Alignment()] @@ -501,6 +507,65 @@ (MOVWstore [off] {sym} ptr (MOVWconst [0]) mem) => (MOVWstorezero [off] {sym} ptr mem) (MOVDstore [off] {sym} ptr (MOVDconst [0]) mem) => (MOVDstorezero [off] {sym} ptr mem) +// Avoid sign/zero extension after properly typed load. +(MOVBreg x:(MOVBload _ _)) => (MOVDreg x) +(MOVHreg x:(MOVBload _ _)) => (MOVDreg x) +(MOVHreg x:(MOVBUload _ _)) => (MOVDreg x) +(MOVHreg x:(MOVHload _ _)) => (MOVDreg x) +(MOVWreg x:(MOVBload _ _)) => (MOVDreg x) +(MOVWreg x:(MOVBUload _ _)) => (MOVDreg x) +(MOVWreg x:(MOVHload _ _)) => (MOVDreg x) +(MOVWreg x:(MOVHUload _ _)) => (MOVDreg x) +(MOVWreg x:(MOVWload _ _)) => (MOVDreg x) +(MOVBUreg x:(MOVBUload _ _)) => (MOVDreg x) +(MOVHUreg x:(MOVBUload _ _)) => (MOVDreg x) +(MOVHUreg x:(MOVHUload _ _)) => (MOVDreg x) +(MOVWUreg x:(MOVBUload _ _)) => (MOVDreg x) +(MOVWUreg x:(MOVHUload _ _)) => (MOVDreg x) +(MOVWUreg x:(MOVWUload _ _)) => (MOVDreg x) + +// Fold double extensions. +(MOVBreg x:(MOVBreg _)) => (MOVDreg x) +(MOVHreg x:(MOVBreg _)) => (MOVDreg x) +(MOVHreg x:(MOVBUreg _)) => (MOVDreg x) +(MOVHreg x:(MOVHreg _)) => (MOVDreg x) +(MOVWreg x:(MOVBreg _)) => (MOVDreg x) +(MOVWreg x:(MOVBUreg _)) => (MOVDreg x) +(MOVWreg x:(MOVHreg _)) => (MOVDreg x) +(MOVWreg x:(MOVWreg _)) => (MOVDreg x) +(MOVBUreg x:(MOVBUreg _)) => (MOVDreg x) +(MOVHUreg x:(MOVBUreg _)) => (MOVDreg x) +(MOVHUreg x:(MOVHUreg _)) => (MOVDreg x) +(MOVWUreg x:(MOVBUreg _)) => (MOVDreg x) +(MOVWUreg x:(MOVHUreg _)) => (MOVDreg x) +(MOVWUreg x:(MOVWUreg _)) => (MOVDreg x) + +// Do not extend before store. +(MOVBstore [off] {sym} ptr (MOVBreg x) mem) => (MOVBstore [off] {sym} ptr x mem) +(MOVBstore [off] {sym} ptr (MOVHreg x) mem) => (MOVBstore [off] {sym} ptr x mem) +(MOVBstore [off] {sym} ptr (MOVWreg x) mem) => (MOVBstore [off] {sym} ptr x mem) +(MOVBstore [off] {sym} ptr (MOVBUreg x) mem) => (MOVBstore [off] {sym} ptr x mem) +(MOVBstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVBstore [off] {sym} ptr x mem) +(MOVBstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVBstore [off] {sym} ptr x mem) +(MOVHstore [off] {sym} ptr (MOVHreg x) mem) => (MOVHstore [off] {sym} ptr x mem) +(MOVHstore [off] {sym} ptr (MOVWreg x) mem) => (MOVHstore [off] {sym} ptr x mem) +(MOVHstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVHstore [off] {sym} ptr x mem) +(MOVHstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVHstore [off] {sym} ptr x mem) +(MOVWstore [off] {sym} ptr (MOVWreg x) mem) => (MOVWstore [off] {sym} ptr x mem) +(MOVWstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVWstore [off] {sym} ptr x mem) + +// Replace extend after load with alternate load where possible. +(MOVBreg <t> x:(MOVBUload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBload <t> [off] {sym} ptr mem) +(MOVHreg <t> x:(MOVHUload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVHload <t> [off] {sym} ptr mem) +(MOVWreg <t> x:(MOVWUload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWload <t> [off] {sym} ptr mem) +(MOVBUreg <t> x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBUload <t> [off] {sym} ptr mem) +(MOVHUreg <t> x:(MOVHload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVHUload <t> [off] {sym} ptr mem) +(MOVWUreg <t> x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWUload <t> [off] {sym} ptr mem) + +// If a register move has only 1 use, just use the same register without emitting instruction +// MOVnop does not emit an instruction, only for ensuring the type. +(MOVDreg x) && x.Uses == 1 => (MOVDnop x) + // Fold constant into immediate instructions where possible. (ADD (MOVBconst [val]) x) => (ADDI [int64(val)] x) (ADD (MOVHconst [val]) x) => (ADDI [int64(val)] x) diff --git a/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go b/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go index b06b86075e..f64319230b 100644 --- a/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go +++ b/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go @@ -24,10 +24,11 @@ import ( // L = 64 bit int, used when the opcode starts with F const ( - riscv64REG_G = 4 + riscv64REG_G = 27 riscv64REG_CTXT = 20 riscv64REG_LR = 1 riscv64REG_SP = 2 + riscv64REG_TP = 4 riscv64REG_TMP = 31 riscv64REG_ZERO = 0 ) @@ -78,8 +79,8 @@ func init() { // Add general purpose registers to gpMask. switch r { - // ZERO, and TMP are not in any gp mask. - case riscv64REG_ZERO, riscv64REG_TMP: + // ZERO, TP and TMP are not in any gp mask. + case riscv64REG_ZERO, riscv64REG_TP, riscv64REG_TMP: case riscv64REG_G: gpgMask |= mask gpspsbgMask |= mask @@ -192,6 +193,17 @@ func init() { {name: "MOVWstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // 32 bits {name: "MOVDstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOV", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // 64 bits + // Conversions + {name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVB"}, // move from arg0, sign-extended from byte + {name: "MOVHreg", argLength: 1, reg: gp11, asm: "MOVH"}, // move from arg0, sign-extended from half + {name: "MOVWreg", argLength: 1, reg: gp11, asm: "MOVW"}, // move from arg0, sign-extended from word + {name: "MOVDreg", argLength: 1, reg: gp11, asm: "MOV"}, // move from arg0 + {name: "MOVBUreg", argLength: 1, reg: gp11, asm: "MOVBU"}, // move from arg0, unsign-extended from byte + {name: "MOVHUreg", argLength: 1, reg: gp11, asm: "MOVHU"}, // move from arg0, unsign-extended from half + {name: "MOVWUreg", argLength: 1, reg: gp11, asm: "MOVWU"}, // move from arg0, unsign-extended from word + + {name: "MOVDnop", argLength: 1, reg: regInfo{inputs: []regMask{gpMask}, outputs: []regMask{gpMask}}, resultInArg0: true}, // nop, return arg0 in same register + // Shift ops {name: "SLL", argLength: 2, reg: gp21, asm: "SLL"}, // arg0 << (aux1 & 63) {name: "SRA", argLength: 2, reg: gp21, asm: "SRA"}, // arg0 >> (aux1 & 63), signed @@ -228,6 +240,44 @@ func init() { {name: "CALLclosure", argLength: 3, reg: callClosure, aux: "CallOff", call: true}, // call function via closure. arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem {name: "CALLinter", argLength: 2, reg: callInter, aux: "CallOff", call: true}, // call fn by pointer. arg0=codeptr, arg1=mem, auxint=argsize, returns mem + // duffzero + // arg0 = address of memory to zero (in X10, changed as side effect) + // arg1 = mem + // auxint = offset into duffzero code to start executing + // X1 (link register) changed because of function call + // returns mem + { + name: "DUFFZERO", + aux: "Int64", + argLength: 2, + reg: regInfo{ + inputs: []regMask{regNamed["X10"]}, + clobbers: regNamed["X1"] | regNamed["X10"], + }, + typ: "Mem", + faultOnNilArg0: true, + }, + + // duffcopy + // arg0 = address of dst memory (in X11, changed as side effect) + // arg1 = address of src memory (in X10, changed as side effect) + // arg2 = mem + // auxint = offset into duffcopy code to start executing + // X1 (link register) changed because of function call + // returns mem + { + name: "DUFFCOPY", + aux: "Int64", + argLength: 3, + reg: regInfo{ + inputs: []regMask{regNamed["X11"], regNamed["X10"]}, + clobbers: regNamed["X1"] | regNamed["X10"] | regNamed["X11"], + }, + typ: "Mem", + faultOnNilArg0: true, + faultOnNilArg1: true, + }, + // Generic moves and zeros // general unaligned zeroing diff --git a/src/cmd/compile/internal/ssa/gen/S390X.rules b/src/cmd/compile/internal/ssa/gen/S390X.rules index e564f638d3..2d6f091a4e 100644 --- a/src/cmd/compile/internal/ssa/gen/S390X.rules +++ b/src/cmd/compile/internal/ssa/gen/S390X.rules @@ -198,6 +198,9 @@ (RXSBG <typ.UInt32> {s390x.NewRotateParams(59, 60, 3)} (MOVDconst [3<<3]) ptr)) mem) +(AtomicAnd32 ...) => (LAN ...) +(AtomicOr32 ...) => (LAO ...) + // Lowering extension // Note: we always extend to 64 bits even though some ops don't need that many result bits. (SignExt8to(16|32|64) ...) => (MOVBreg ...) diff --git a/src/cmd/compile/internal/ssa/gen/S390XOps.go b/src/cmd/compile/internal/ssa/gen/S390XOps.go index 417b33cf91..728cfb5508 100644 --- a/src/cmd/compile/internal/ssa/gen/S390XOps.go +++ b/src/cmd/compile/internal/ssa/gen/S390XOps.go @@ -547,8 +547,10 @@ func init() { // Atomic bitwise operations. // Note: 'floor' operations round the pointer down to the nearest word boundary // which reflects how they are used in the runtime. - {name: "LAOfloor", argLength: 3, reg: gpstorelab, asm: "LAO", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *(floor(arg0, 4)) |= arg1. arg2 = mem. + {name: "LAN", argLength: 3, reg: gpstore, asm: "LAN", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *arg0 &= arg1. arg2 = mem. {name: "LANfloor", argLength: 3, reg: gpstorelab, asm: "LAN", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *(floor(arg0, 4)) &= arg1. arg2 = mem. + {name: "LAO", argLength: 3, reg: gpstore, asm: "LAO", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *arg0 |= arg1. arg2 = mem. + {name: "LAOfloor", argLength: 3, reg: gpstorelab, asm: "LAO", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *(floor(arg0, 4)) |= arg1. arg2 = mem. // Compare and swap. // arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory. diff --git a/src/cmd/compile/internal/ssa/gen/dec64.rules b/src/cmd/compile/internal/ssa/gen/dec64.rules index 4f9e863f90..07607960fa 100644 --- a/src/cmd/compile/internal/ssa/gen/dec64.rules +++ b/src/cmd/compile/internal/ssa/gen/dec64.rules @@ -9,7 +9,6 @@ (Int64Hi (Int64Make hi _)) => hi (Int64Lo (Int64Make _ lo)) => lo - (Load <t> ptr mem) && is64BitInt(t) && !config.BigEndian && t.IsSigned() => (Int64Make (Load <typ.Int32> (OffPtr <typ.Int32Ptr> [4] ptr) mem) @@ -143,6 +142,10 @@ (Trunc64to32 (Int64Make _ lo)) => lo (Trunc64to16 (Int64Make _ lo)) => (Trunc32to16 lo) (Trunc64to8 (Int64Make _ lo)) => (Trunc32to8 lo) +// Most general +(Trunc64to32 x) => (Int64Lo x) +(Trunc64to16 x) => (Trunc32to16 (Int64Lo x)) +(Trunc64to8 x) => (Trunc32to8 (Int64Lo x)) (Lsh32x64 _ (Int64Make (Const32 [c]) _)) && c != 0 => (Const32 [0]) (Rsh32x64 x (Int64Make (Const32 [c]) _)) && c != 0 => (Signmask x) @@ -175,156 +178,174 @@ // turn x64 non-constant shifts to x32 shifts // if high 32-bit of the shift is nonzero, make a huge shift (Lsh64x64 x (Int64Make hi lo)) && hi.Op != OpConst32 => - (Lsh64x32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) + (Lsh64x32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) (Rsh64x64 x (Int64Make hi lo)) && hi.Op != OpConst32 => - (Rsh64x32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) + (Rsh64x32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) (Rsh64Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 => - (Rsh64Ux32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) + (Rsh64Ux32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) (Lsh32x64 x (Int64Make hi lo)) && hi.Op != OpConst32 => - (Lsh32x32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) + (Lsh32x32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) (Rsh32x64 x (Int64Make hi lo)) && hi.Op != OpConst32 => - (Rsh32x32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) + (Rsh32x32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) (Rsh32Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 => - (Rsh32Ux32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) + (Rsh32Ux32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) (Lsh16x64 x (Int64Make hi lo)) && hi.Op != OpConst32 => - (Lsh16x32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) + (Lsh16x32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) (Rsh16x64 x (Int64Make hi lo)) && hi.Op != OpConst32 => - (Rsh16x32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) + (Rsh16x32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) (Rsh16Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 => - (Rsh16Ux32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) + (Rsh16Ux32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) (Lsh8x64 x (Int64Make hi lo)) && hi.Op != OpConst32 => - (Lsh8x32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) + (Lsh8x32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) (Rsh8x64 x (Int64Make hi lo)) && hi.Op != OpConst32 => - (Rsh8x32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) + (Rsh8x32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) (Rsh8Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 => - (Rsh8Ux32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) + (Rsh8Ux32 x (Or32 <typ.UInt32> (Zeromask hi) lo)) + +// Most general +(Lsh64x64 x y) => (Lsh64x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y))) +(Rsh64x64 x y) => (Rsh64x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y))) +(Rsh64Ux64 x y) => (Rsh64Ux32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y))) +(Lsh32x64 x y) => (Lsh32x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y))) +(Rsh32x64 x y) => (Rsh32x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y))) +(Rsh32Ux64 x y) => (Rsh32Ux32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y))) +(Lsh16x64 x y) => (Lsh16x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y))) +(Rsh16x64 x y) => (Rsh16x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y))) +(Rsh16Ux64 x y) => (Rsh16Ux32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y))) +(Lsh8x64 x y) => (Lsh8x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y))) +(Rsh8x64 x y) => (Rsh8x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y))) +(Rsh8Ux64 x y) => (Rsh8Ux32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y))) + +// Clean up constants a little +(Or32 <typ.UInt32> (Zeromask (Const32 [c])) y) && c == 0 => y +(Or32 <typ.UInt32> (Zeromask (Const32 [c])) y) && c != 0 => (Const32 <typ.UInt32> [-1]) // 64x left shift // result.hi = hi<<s | lo>>(32-s) | lo<<(s-32) // >> is unsigned, large shifts result 0 // result.lo = lo<<s -(Lsh64x32 (Int64Make hi lo) s) => +(Lsh64x32 x s) => (Int64Make (Or32 <typ.UInt32> (Or32 <typ.UInt32> - (Lsh32x32 <typ.UInt32> hi s) + (Lsh32x32 <typ.UInt32> (Int64Hi x) s) (Rsh32Ux32 <typ.UInt32> - lo + (Int64Lo x) (Sub32 <typ.UInt32> (Const32 <typ.UInt32> [32]) s))) (Lsh32x32 <typ.UInt32> - lo + (Int64Lo x) (Sub32 <typ.UInt32> s (Const32 <typ.UInt32> [32])))) - (Lsh32x32 <typ.UInt32> lo s)) -(Lsh64x16 (Int64Make hi lo) s) => + (Lsh32x32 <typ.UInt32> (Int64Lo x) s)) +(Lsh64x16 x s) => (Int64Make (Or32 <typ.UInt32> (Or32 <typ.UInt32> - (Lsh32x16 <typ.UInt32> hi s) + (Lsh32x16 <typ.UInt32> (Int64Hi x) s) (Rsh32Ux16 <typ.UInt32> - lo + (Int64Lo x) (Sub16 <typ.UInt16> (Const16 <typ.UInt16> [32]) s))) (Lsh32x16 <typ.UInt32> - lo + (Int64Lo x) (Sub16 <typ.UInt16> s (Const16 <typ.UInt16> [32])))) - (Lsh32x16 <typ.UInt32> lo s)) -(Lsh64x8 (Int64Make hi lo) s) => + (Lsh32x16 <typ.UInt32> (Int64Lo x) s)) +(Lsh64x8 x s) => (Int64Make (Or32 <typ.UInt32> (Or32 <typ.UInt32> - (Lsh32x8 <typ.UInt32> hi s) + (Lsh32x8 <typ.UInt32> (Int64Hi x) s) (Rsh32Ux8 <typ.UInt32> - lo + (Int64Lo x) (Sub8 <typ.UInt8> (Const8 <typ.UInt8> [32]) s))) (Lsh32x8 <typ.UInt32> - lo + (Int64Lo x) (Sub8 <typ.UInt8> s (Const8 <typ.UInt8> [32])))) - (Lsh32x8 <typ.UInt32> lo s)) + (Lsh32x8 <typ.UInt32> (Int64Lo x) s)) // 64x unsigned right shift // result.hi = hi>>s // result.lo = lo>>s | hi<<(32-s) | hi>>(s-32) // >> is unsigned, large shifts result 0 -(Rsh64Ux32 (Int64Make hi lo) s) => +(Rsh64Ux32 x s) => (Int64Make - (Rsh32Ux32 <typ.UInt32> hi s) + (Rsh32Ux32 <typ.UInt32> (Int64Hi x) s) (Or32 <typ.UInt32> (Or32 <typ.UInt32> - (Rsh32Ux32 <typ.UInt32> lo s) + (Rsh32Ux32 <typ.UInt32> (Int64Lo x) s) (Lsh32x32 <typ.UInt32> - hi + (Int64Hi x) (Sub32 <typ.UInt32> (Const32 <typ.UInt32> [32]) s))) (Rsh32Ux32 <typ.UInt32> - hi + (Int64Hi x) (Sub32 <typ.UInt32> s (Const32 <typ.UInt32> [32]))))) -(Rsh64Ux16 (Int64Make hi lo) s) => +(Rsh64Ux16 x s) => (Int64Make - (Rsh32Ux16 <typ.UInt32> hi s) + (Rsh32Ux16 <typ.UInt32> (Int64Hi x) s) (Or32 <typ.UInt32> (Or32 <typ.UInt32> - (Rsh32Ux16 <typ.UInt32> lo s) + (Rsh32Ux16 <typ.UInt32> (Int64Lo x) s) (Lsh32x16 <typ.UInt32> - hi + (Int64Hi x) (Sub16 <typ.UInt16> (Const16 <typ.UInt16> [32]) s))) (Rsh32Ux16 <typ.UInt32> - hi + (Int64Hi x) (Sub16 <typ.UInt16> s (Const16 <typ.UInt16> [32]))))) -(Rsh64Ux8 (Int64Make hi lo) s) => +(Rsh64Ux8 x s) => (Int64Make - (Rsh32Ux8 <typ.UInt32> hi s) + (Rsh32Ux8 <typ.UInt32> (Int64Hi x) s) (Or32 <typ.UInt32> (Or32 <typ.UInt32> - (Rsh32Ux8 <typ.UInt32> lo s) + (Rsh32Ux8 <typ.UInt32> (Int64Lo x) s) (Lsh32x8 <typ.UInt32> - hi + (Int64Hi x) (Sub8 <typ.UInt8> (Const8 <typ.UInt8> [32]) s))) (Rsh32Ux8 <typ.UInt32> - hi + (Int64Hi x) (Sub8 <typ.UInt8> s (Const8 <typ.UInt8> [32]))))) // 64x signed right shift // result.hi = hi>>s // result.lo = lo>>s | hi<<(32-s) | (hi>>(s-32))&zeromask(s>>5) // hi>>(s-32) is signed, large shifts result 0/-1 -(Rsh64x32 (Int64Make hi lo) s) => +(Rsh64x32 x s) => (Int64Make - (Rsh32x32 <typ.UInt32> hi s) + (Rsh32x32 <typ.UInt32> (Int64Hi x) s) (Or32 <typ.UInt32> (Or32 <typ.UInt32> - (Rsh32Ux32 <typ.UInt32> lo s) + (Rsh32Ux32 <typ.UInt32> (Int64Lo x) s) (Lsh32x32 <typ.UInt32> - hi + (Int64Hi x) (Sub32 <typ.UInt32> (Const32 <typ.UInt32> [32]) s))) (And32 <typ.UInt32> (Rsh32x32 <typ.UInt32> - hi + (Int64Hi x) (Sub32 <typ.UInt32> s (Const32 <typ.UInt32> [32]))) (Zeromask (Rsh32Ux32 <typ.UInt32> s (Const32 <typ.UInt32> [5])))))) -(Rsh64x16 (Int64Make hi lo) s) => +(Rsh64x16 x s) => (Int64Make - (Rsh32x16 <typ.UInt32> hi s) + (Rsh32x16 <typ.UInt32> (Int64Hi x) s) (Or32 <typ.UInt32> (Or32 <typ.UInt32> - (Rsh32Ux16 <typ.UInt32> lo s) + (Rsh32Ux16 <typ.UInt32> (Int64Lo x) s) (Lsh32x16 <typ.UInt32> - hi + (Int64Hi x) (Sub16 <typ.UInt16> (Const16 <typ.UInt16> [32]) s))) (And32 <typ.UInt32> (Rsh32x16 <typ.UInt32> - hi + (Int64Hi x) (Sub16 <typ.UInt16> s (Const16 <typ.UInt16> [32]))) (Zeromask (ZeroExt16to32 (Rsh16Ux32 <typ.UInt16> s (Const32 <typ.UInt32> [5]))))))) -(Rsh64x8 (Int64Make hi lo) s) => +(Rsh64x8 x s) => (Int64Make - (Rsh32x8 <typ.UInt32> hi s) + (Rsh32x8 <typ.UInt32> (Int64Hi x) s) (Or32 <typ.UInt32> (Or32 <typ.UInt32> - (Rsh32Ux8 <typ.UInt32> lo s) + (Rsh32Ux8 <typ.UInt32> (Int64Lo x) s) (Lsh32x8 <typ.UInt32> - hi + (Int64Hi x) (Sub8 <typ.UInt8> (Const8 <typ.UInt8> [32]) s))) (And32 <typ.UInt32> (Rsh32x8 <typ.UInt32> - hi + (Int64Hi x) (Sub8 <typ.UInt8> s (Const8 <typ.UInt8> [32]))) (Zeromask (ZeroExt8to32 diff --git a/src/cmd/compile/internal/ssa/gen/generic.rules b/src/cmd/compile/internal/ssa/gen/generic.rules index 39f8cc8889..4351ef5bdd 100644 --- a/src/cmd/compile/internal/ssa/gen/generic.rules +++ b/src/cmd/compile/internal/ssa/gen/generic.rules @@ -1961,6 +1961,31 @@ && warnRule(fe.Debug_checknil(), v, "removed nil check") => (Invalid) +// for late-expanded calls +(Zero (SelectN [0] call:(StaticLECall _ _)) mem:(SelectN [1] call)) + && isSameCall(call.Aux, "runtime.newobject") + => mem + +(Store (SelectN [0] call:(StaticLECall _ _)) x mem:(SelectN [1] call)) + && isConstZero(x) + && isSameCall(call.Aux, "runtime.newobject") + => mem + +(Store (OffPtr (SelectN [0] call:(StaticLECall _ _))) x mem:(SelectN [1] call)) + && isConstZero(x) + && isSameCall(call.Aux, "runtime.newobject") + => mem + +(NilCheck (SelectN [0] call:(StaticLECall _ _)) (SelectN [1] call)) + && isSameCall(call.Aux, "runtime.newobject") + && warnRule(fe.Debug_checknil(), v, "removed nil check") + => (Invalid) + +(NilCheck (OffPtr (SelectN [0] call:(StaticLECall _ _))) (SelectN [1] call)) + && isSameCall(call.Aux, "runtime.newobject") + && warnRule(fe.Debug_checknil(), v, "removed nil check") + => (Invalid) + // Evaluate constant address comparisons. (EqPtr x x) => (ConstBool [true]) (NeqPtr x x) => (ConstBool [false]) @@ -2017,6 +2042,17 @@ && clobber(s1, s2, s3) => (Move {t.Elem()} [int64(sz)] dst src mem) +// Inline small or disjoint runtime.memmove calls with constant length. +// See the comment in op Move in genericOps.go for discussion of the type. +(SelectN [0] call:(StaticLECall {sym} dst src (Const(64|32) [sz]) mem)) + && sz >= 0 + && call.Uses == 1 // this will exclude all calls with results + && isSameCall(sym, "runtime.memmove") + && dst.Type.IsPtr() // avoids TUINTPTR, see issue 30061 + && isInlinableMemmove(dst, src, int64(sz), config) + && clobber(call) + => (Move {dst.Type.Elem()} [int64(sz)] dst src mem) + // De-virtualize interface calls into static calls. // Note that (ITab (IMake)) doesn't get // rewritten until after the first opt pass, @@ -2024,6 +2060,13 @@ (InterCall [argsize] {auxCall} (Load (OffPtr [off] (ITab (IMake (Addr {itab} (SB)) _))) _) mem) && devirt(v, auxCall, itab, off) != nil => (StaticCall [int32(argsize)] {devirt(v, auxCall, itab, off)} mem) +// De-virtualize late-expanded interface calls into late-expanded static calls. +// Note that (ITab (IMake)) doesn't get rewritten until after the first opt pass, +// so this rule should trigger reliably. +// devirtLECall removes the first argument, adds the devirtualized symbol to the AuxCall, and changes the opcode +(InterLECall [argsize] {auxCall} (Load (OffPtr [off] (ITab (IMake (Addr {itab} (SB)) _))) _) ___) && devirtLESym(v, auxCall, itab, off) != + nil => devirtLECall(v, devirtLESym(v, auxCall, itab, off)) + // Move and Zero optimizations. // Move source and destination may overlap. @@ -2404,6 +2447,7 @@ (Store {t5} (OffPtr <tt5> [o5] dst) d4 (Zero {t1} [n] dst mem))))) +// TODO this does not fire before call expansion; is that acceptable? (StaticCall {sym} x) && needRaceCleanup(sym, v) => x // Collapse moving A -> B -> C into just A -> C. diff --git a/src/cmd/compile/internal/ssa/gen/genericOps.go b/src/cmd/compile/internal/ssa/gen/genericOps.go index 95edff4c8c..23bd4af2cd 100644 --- a/src/cmd/compile/internal/ssa/gen/genericOps.go +++ b/src/cmd/compile/internal/ssa/gen/genericOps.go @@ -389,10 +389,12 @@ var genericOps = []opData{ // TODO(josharian): ClosureCall and InterCall should have Int32 aux // to match StaticCall's 32 bit arg size limit. // TODO(drchase,josharian): could the arg size limit be bundled into the rules for CallOff? - {name: "ClosureCall", argLength: 3, aux: "CallOff", call: true}, // arg0=code pointer, arg1=context ptr, arg2=memory. auxint=arg size. Returns memory. - {name: "StaticCall", argLength: 1, aux: "CallOff", call: true}, // call function aux.(*obj.LSym), arg0=memory. auxint=arg size. Returns memory. - {name: "InterCall", argLength: 2, aux: "CallOff", call: true}, // interface call. arg0=code pointer, arg1=memory, auxint=arg size. Returns memory. - {name: "StaticLECall", argLength: -1, aux: "CallOff", call: true}, // late-expanded static call function aux.(*ssa.AuxCall.Fn). arg0..argN-1 are inputs, argN is mem. auxint = arg size. Result is tuple of result(s), plus mem. + {name: "ClosureCall", argLength: 3, aux: "CallOff", call: true}, // arg0=code pointer, arg1=context ptr, arg2=memory. auxint=arg size. Returns memory. + {name: "StaticCall", argLength: 1, aux: "CallOff", call: true}, // call function aux.(*obj.LSym), arg0=memory. auxint=arg size. Returns memory. + {name: "InterCall", argLength: 2, aux: "CallOff", call: true}, // interface call. arg0=code pointer, arg1=memory, auxint=arg size. Returns memory. + {name: "ClosureLECall", argLength: -1, aux: "CallOff", call: true}, // late-expanded closure call. arg0=code pointer, arg1=context ptr, arg2..argN-1 are inputs, argN is mem. auxint = arg size. Result is tuple of result(s), plus mem. + {name: "StaticLECall", argLength: -1, aux: "CallOff", call: true}, // late-expanded static call function aux.(*ssa.AuxCall.Fn). arg0..argN-1 are inputs, argN is mem. auxint = arg size. Result is tuple of result(s), plus mem. + {name: "InterLECall", argLength: -1, aux: "CallOff", call: true}, // late-expanded interface call. arg0=code pointer, arg1..argN-1 are inputs, argN is mem. auxint = arg size. Result is tuple of result(s), plus mem. // Conversions: signed extensions, zero (unsigned) extensions, truncations {name: "SignExt8to16", argLength: 1, typ: "Int16"}, @@ -539,20 +541,22 @@ var genericOps = []opData{ {name: "SelectN", argLength: 1, aux: "Int64"}, // arg0=tuple, auxint=field index. Returns the auxint'th member. {name: "SelectNAddr", argLength: 1, aux: "Int64"}, // arg0=tuple, auxint=field index. Returns the address of auxint'th member. Used for un-SSA-able result types. - // Atomic operations used for semantically inlining runtime/internal/atomic. - // Atomic loads return a new memory so that the loads are properly ordered - // with respect to other loads and stores. - // TODO: use for sync/atomic at some point. + // Atomic operations used for semantically inlining sync/atomic and + // runtime/internal/atomic. Atomic loads return a new memory so that + // the loads are properly ordered with respect to other loads and + // stores. {name: "AtomicLoad8", argLength: 2, typ: "(UInt8,Mem)"}, // Load from arg0. arg1=memory. Returns loaded value and new memory. {name: "AtomicLoad32", argLength: 2, typ: "(UInt32,Mem)"}, // Load from arg0. arg1=memory. Returns loaded value and new memory. {name: "AtomicLoad64", argLength: 2, typ: "(UInt64,Mem)"}, // Load from arg0. arg1=memory. Returns loaded value and new memory. {name: "AtomicLoadPtr", argLength: 2, typ: "(BytePtr,Mem)"}, // Load from arg0. arg1=memory. Returns loaded value and new memory. {name: "AtomicLoadAcq32", argLength: 2, typ: "(UInt32,Mem)"}, // Load from arg0. arg1=memory. Lock acquisition, returns loaded value and new memory. + {name: "AtomicLoadAcq64", argLength: 2, typ: "(UInt64,Mem)"}, // Load from arg0. arg1=memory. Lock acquisition, returns loaded value and new memory. {name: "AtomicStore8", argLength: 3, typ: "Mem", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Returns memory. {name: "AtomicStore32", argLength: 3, typ: "Mem", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Returns memory. {name: "AtomicStore64", argLength: 3, typ: "Mem", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Returns memory. {name: "AtomicStorePtrNoWB", argLength: 3, typ: "Mem", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Returns memory. {name: "AtomicStoreRel32", argLength: 3, typ: "Mem", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Lock release, returns memory. + {name: "AtomicStoreRel64", argLength: 3, typ: "Mem", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Lock release, returns memory. {name: "AtomicExchange32", argLength: 3, typ: "(UInt32,Mem)", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Returns old contents of *arg0 and new memory. {name: "AtomicExchange64", argLength: 3, typ: "(UInt64,Mem)", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Returns old contents of *arg0 and new memory. {name: "AtomicAdd32", argLength: 3, typ: "(UInt32,Mem)", hasSideEffects: true}, // Do *arg0 += arg1. arg2=memory. Returns sum and new memory. @@ -561,7 +565,9 @@ var genericOps = []opData{ {name: "AtomicCompareAndSwap64", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true}, // if *arg0==arg1, then set *arg0=arg2. Returns true if store happens and new memory. {name: "AtomicCompareAndSwapRel32", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true}, // if *arg0==arg1, then set *arg0=arg2. Lock release, reports whether store happens and new memory. {name: "AtomicAnd8", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns memory. + {name: "AtomicAnd32", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns memory. {name: "AtomicOr8", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns memory. + {name: "AtomicOr32", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns memory. // Atomic operation variants // These variants have the same semantics as above atomic operations. diff --git a/src/cmd/compile/internal/ssa/gen/rulegen.go b/src/cmd/compile/internal/ssa/gen/rulegen.go index be51a7c5f8..120ccbbdb3 100644 --- a/src/cmd/compile/internal/ssa/gen/rulegen.go +++ b/src/cmd/compile/internal/ssa/gen/rulegen.go @@ -35,8 +35,7 @@ import ( ) // rule syntax: -// sexpr [&& extra conditions] -> [@block] sexpr (untyped) -// sexpr [&& extra conditions] => [@block] sexpr (typed) +// sexpr [&& extra conditions] => [@block] sexpr // // sexpr are s-expressions (lisp-like parenthesized groupings) // sexpr ::= [variable:](opcode sexpr*) @@ -50,8 +49,12 @@ import ( // variable ::= some token // opcode ::= one of the opcodes from the *Ops.go files +// special rules: trailing ellipsis "..." (in the outermost sexpr?) must match on both sides of a rule. +// trailing three underscore "___" in the outermost match sexpr indicate the presence of +// extra ignored args that need not appear in the replacement + // extra conditions is just a chunk of Go that evaluates to a boolean. It may use -// variables declared in the matching sexpr. The variable "v" is predefined to be +// variables declared in the matching tsexpr. The variable "v" is predefined to be // the value matched by the entire rule. // If multiple rules match, the first one in file order is selected. @@ -75,14 +78,8 @@ func normalizeSpaces(s string) string { } // parse returns the matching part of the rule, additional conditions, and the result. -// parse also reports whether the generated code should use strongly typed aux and auxint fields. -func (r Rule) parse() (match, cond, result string, typed bool) { - arrow := "->" - if strings.Contains(r.Rule, "=>") { - arrow = "=>" - typed = true - } - s := strings.Split(r.Rule, arrow) +func (r Rule) parse() (match, cond, result string) { + s := strings.Split(r.Rule, "=>") match = normalizeSpaces(s[0]) result = normalizeSpaces(s[1]) cond = "" @@ -90,7 +87,7 @@ func (r Rule) parse() (match, cond, result string, typed bool) { cond = normalizeSpaces(match[i+2:]) match = normalizeSpaces(match[:i]) } - return match, cond, result, typed + return match, cond, result } func genRules(arch arch) { genRulesSuffix(arch, "") } @@ -116,7 +113,7 @@ func genRulesSuffix(arch arch, suff string) { scanner := bufio.NewScanner(text) rule := "" var lineno int - var ruleLineno int // line number of "->" or "=>" + var ruleLineno int // line number of "=>" for scanner.Scan() { lineno++ line := scanner.Text() @@ -130,13 +127,13 @@ func genRulesSuffix(arch arch, suff string) { if rule == "" { continue } - if !strings.Contains(rule, "->") && !strings.Contains(rule, "=>") { + if !strings.Contains(rule, "=>") { continue } if ruleLineno == 0 { ruleLineno = lineno } - if strings.HasSuffix(rule, "->") || strings.HasSuffix(rule, "=>") { + if strings.HasSuffix(rule, "=>") { continue // continue on the next line } if n := balance(rule); n > 0 { @@ -153,7 +150,7 @@ func genRulesSuffix(arch arch, suff string) { continue } // Do fancier value op matching. - match, _, _, _ := r.parse() + match, _, _ := r.parse() op, oparch, _, _, _, _ := parseValue(match, arch, loc) opname := fmt.Sprintf("Op%s%s", oparch, op.name) oprules[opname] = append(oprules[opname], r) @@ -227,7 +224,7 @@ func genRulesSuffix(arch arch, suff string) { log.Fatalf("unconditional rule %s is followed by other rules", rr.Match) } rr = &RuleRewrite{Loc: rule.Loc} - rr.Match, rr.Cond, rr.Result, rr.Typed = rule.parse() + rr.Match, rr.Cond, rr.Result = rule.parse() pos, _ := genMatch(rr, arch, rr.Match, fn.ArgLen >= 0) if pos == "" { pos = "v.Pos" @@ -786,7 +783,6 @@ type ( Alloc int // for unique var names Loc string // file name & line number of the original rule CommuteDepth int // used to track depth of commute loops - Typed bool // aux and auxint fields should be strongly typed } Declare struct { Name string @@ -840,7 +836,7 @@ func breakf(format string, a ...interface{}) *CondBreak { func genBlockRewrite(rule Rule, arch arch, data blockData) *RuleRewrite { rr := &RuleRewrite{Loc: rule.Loc} - rr.Match, rr.Cond, rr.Result, rr.Typed = rule.parse() + rr.Match, rr.Cond, rr.Result = rule.parse() _, _, auxint, aux, s := extract(rr.Match) // remove parens, then split // check match of control values @@ -884,15 +880,6 @@ func genBlockRewrite(rule Rule, arch arch, data blockData) *RuleRewrite { if e.name == "" { continue } - if !rr.Typed { - if !token.IsIdentifier(e.name) || rr.declared(e.name) { - // code or variable - rr.add(breakf("b.%s != %s", e.field, e.name)) - } else { - rr.add(declf(e.name, "b.%s", e.field)) - } - continue - } if e.dclType == "" { log.Fatalf("op %s has no declared type for %s", data.name, e.field) @@ -961,20 +948,12 @@ func genBlockRewrite(rule Rule, arch arch, data blockData) *RuleRewrite { } if auxint != "" { - if rr.Typed { - // Make sure auxint value has the right type. - rr.add(stmtf("b.AuxInt = %sToAuxInt(%s)", unTitle(outdata.auxIntType()), auxint)) - } else { - rr.add(stmtf("b.AuxInt = %s", auxint)) - } + // Make sure auxint value has the right type. + rr.add(stmtf("b.AuxInt = %sToAuxInt(%s)", unTitle(outdata.auxIntType()), auxint)) } if aux != "" { - if rr.Typed { - // Make sure aux value has the right type. - rr.add(stmtf("b.Aux = %sToAux(%s)", unTitle(outdata.auxType()), aux)) - } else { - rr.add(stmtf("b.Aux = %s", aux)) - } + // Make sure aux value has the right type. + rr.add(stmtf("b.Aux = %sToAux(%s)", unTitle(outdata.auxType()), aux)) } succChanged := false @@ -1019,6 +998,19 @@ func genMatch0(rr *RuleRewrite, arch arch, match, v string, cnt map[string]int, pos = v + ".Pos" } + // If the last argument is ___, it means "don't care about trailing arguments, really" + // The likely/intended use is for rewrites that are too tricky to express in the existing pattern language + // Do a length check early because long patterns fed short (ultimately not-matching) inputs will + // do an indexing error in pattern-matching. + if op.argLength == -1 { + l := len(args) + if l == 0 || args[l-1] != "___" { + rr.add(breakf("len(%s.Args) != %d", v, l)) + } else if l > 1 && args[l-1] == "___" { + rr.add(breakf("len(%s.Args) < %d", v, l-1)) + } + } + for _, e := range []struct { name, field, dclType string }{ @@ -1029,15 +1021,6 @@ func genMatch0(rr *RuleRewrite, arch arch, match, v string, cnt map[string]int, if e.name == "" { continue } - if !rr.Typed { - if !token.IsIdentifier(e.name) || rr.declared(e.name) { - // code or variable - rr.add(breakf("%s.%s != %s", v, e.field, e.name)) - } else { - rr.add(declf(e.name, "%s.%s", v, e.field)) - } - continue - } if e.dclType == "" { log.Fatalf("op %s has no declared type for %s", op.name, e.field) @@ -1159,9 +1142,6 @@ func genMatch0(rr *RuleRewrite, arch arch, match, v string, cnt map[string]int, } } - if op.argLength == -1 { - rr.add(breakf("len(%s.Args) != %d", v, len(args))) - } return pos, checkOp } @@ -1230,20 +1210,12 @@ func genResult0(rr *RuleRewrite, arch arch, result string, top, move bool, pos s } if auxint != "" { - if rr.Typed { - // Make sure auxint value has the right type. - rr.add(stmtf("%s.AuxInt = %sToAuxInt(%s)", v, unTitle(op.auxIntType()), auxint)) - } else { - rr.add(stmtf("%s.AuxInt = %s", v, auxint)) - } + // Make sure auxint value has the right type. + rr.add(stmtf("%s.AuxInt = %sToAuxInt(%s)", v, unTitle(op.auxIntType()), auxint)) } if aux != "" { - if rr.Typed { - // Make sure aux value has the right type. - rr.add(stmtf("%s.Aux = %sToAux(%s)", v, unTitle(op.auxType()), aux)) - } else { - rr.add(stmtf("%s.Aux = %s", v, aux)) - } + // Make sure aux value has the right type. + rr.add(stmtf("%s.Aux = %sToAux(%s)", v, unTitle(op.auxType()), aux)) } all := new(strings.Builder) for i, arg := range args { @@ -1524,7 +1496,7 @@ func excludeFromExpansion(s string, idx []int) bool { return true } right := s[idx[1]:] - if strings.Contains(left, "&&") && (strings.Contains(right, "->") || strings.Contains(right, "=>")) { + if strings.Contains(left, "&&") && strings.Contains(right, "=>") { // Inside && conditions. return true } @@ -1626,7 +1598,6 @@ func normalizeWhitespace(x string) string { x = strings.Replace(x, " )", ")", -1) x = strings.Replace(x, "[ ", "[", -1) x = strings.Replace(x, " ]", "]", -1) - x = strings.Replace(x, ")->", ") ->", -1) x = strings.Replace(x, ")=>", ") =>", -1) return x } @@ -1683,7 +1654,7 @@ func parseEllipsisRules(rules []Rule, arch arch) (newop string, ok bool) { return "", false } rule := rules[0] - match, cond, result, _ := rule.parse() + match, cond, result := rule.parse() if cond != "" || !isEllipsisValue(match) || !isEllipsisValue(result) { if strings.Contains(rule.Rule, "...") { log.Fatalf("%s: found ellipsis in non-ellipsis rule", rule.Loc) @@ -1708,7 +1679,7 @@ func isEllipsisValue(s string) bool { } func checkEllipsisRuleCandidate(rule Rule, arch arch) { - match, cond, result, _ := rule.parse() + match, cond, result := rule.parse() if cond != "" { return } @@ -1718,7 +1689,7 @@ func checkEllipsisRuleCandidate(rule Rule, arch arch) { var usingCopy string var eop opData if result[0] != '(' { - // Check for (Foo x) -> x, which can be converted to (Foo ...) -> (Copy ...). + // Check for (Foo x) => x, which can be converted to (Foo ...) => (Copy ...). args2 = []string{result} usingCopy = " using Copy" } else { |
