summaryrefslogtreecommitdiff
path: root/src/cmd/compile/internal/ssa/gen
diff options
context:
space:
mode:
authorCherry Zhang <cherryyz@google.com>2020-10-28 09:12:20 -0400
committerCherry Zhang <cherryyz@google.com>2020-10-28 09:12:20 -0400
commita16e30d162c1c7408db7821e7b9513cefa09c6ca (patch)
treeaf752ba9ba44c547df39bb0af9bff79f610ba9d5 /src/cmd/compile/internal/ssa/gen
parent91e4d2d57bc341dd82c98247117114c851380aef (diff)
parentcf6cfba4d5358404dd890f6025e573a4b2156543 (diff)
downloadgo-git-dev.link.tar.gz
[dev.link] all: merge branch 'master' into dev.linkdev.link
Clean merge. Change-Id: Ia7b2808bc649790198d34c226a61d9e569084dc5
Diffstat (limited to 'src/cmd/compile/internal/ssa/gen')
-rw-r--r--src/cmd/compile/internal/ssa/gen/386.rules12
-rw-r--r--src/cmd/compile/internal/ssa/gen/386Ops.go14
-rw-r--r--src/cmd/compile/internal/ssa/gen/AMD64.rules14
-rw-r--r--src/cmd/compile/internal/ssa/gen/AMD64Ops.go2
-rw-r--r--src/cmd/compile/internal/ssa/gen/ARM.rules34
-rw-r--r--src/cmd/compile/internal/ssa/gen/ARM64.rules182
-rw-r--r--src/cmd/compile/internal/ssa/gen/ARM64Ops.go6
-rw-r--r--src/cmd/compile/internal/ssa/gen/MIPS.rules11
-rw-r--r--src/cmd/compile/internal/ssa/gen/MIPS64.rules6
-rw-r--r--src/cmd/compile/internal/ssa/gen/PPC64.rules46
-rw-r--r--src/cmd/compile/internal/ssa/gen/PPC64Ops.go23
-rw-r--r--src/cmd/compile/internal/ssa/gen/RISCV64.rules127
-rw-r--r--src/cmd/compile/internal/ssa/gen/RISCV64Ops.go56
-rw-r--r--src/cmd/compile/internal/ssa/gen/S390X.rules3
-rw-r--r--src/cmd/compile/internal/ssa/gen/S390XOps.go4
-rw-r--r--src/cmd/compile/internal/ssa/gen/dec64.rules137
-rw-r--r--src/cmd/compile/internal/ssa/gen/generic.rules44
-rw-r--r--src/cmd/compile/internal/ssa/gen/genericOps.go22
-rw-r--r--src/cmd/compile/internal/ssa/gen/rulegen.go109
19 files changed, 521 insertions, 331 deletions
diff --git a/src/cmd/compile/internal/ssa/gen/386.rules b/src/cmd/compile/internal/ssa/gen/386.rules
index 4a8244eb27..4e6cc8c692 100644
--- a/src/cmd/compile/internal/ssa/gen/386.rules
+++ b/src/cmd/compile/internal/ssa/gen/386.rules
@@ -38,10 +38,8 @@
(Xor(32|16|8) ...) => (XORL ...)
(Neg(32|16|8) ...) => (NEGL ...)
-(Neg32F x) && !config.use387 => (PXOR x (MOVSSconst <typ.Float32> [float32(math.Copysign(0, -1))]))
-(Neg64F x) && !config.use387 => (PXOR x (MOVSDconst <typ.Float64> [math.Copysign(0, -1)]))
-(Neg32F x) && config.use387 => (FCHS x)
-(Neg64F x) && config.use387 => (FCHS x)
+(Neg32F x) => (PXOR x (MOVSSconst <typ.Float32> [float32(math.Copysign(0, -1))]))
+(Neg64F x) => (PXOR x (MOVSDconst <typ.Float64> [math.Copysign(0, -1)]))
(Com(32|16|8) ...) => (NOTL ...)
@@ -312,7 +310,7 @@
(Const32 ...) => (MOVLconst ...)
(Const(32|64)F ...) => (MOVS(S|D)const ...)
(ConstNil) => (MOVLconst [0])
-(ConstBool [c]) => (MOVLconst [int32(b2i(c))])
+(ConstBool [c]) => (MOVLconst [b2i32(c)])
// Lowering calls
(StaticCall ...) => (CALLstatic ...)
@@ -670,8 +668,8 @@
// Merge load/store to op
((ADD|AND|OR|XOR|SUB|MUL)L x l:(MOVLload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) => ((ADD|AND|OR|XOR|SUB|MUL)Lload x [off] {sym} ptr mem)
-((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) => ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem)
-((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && !config.use387 && clobber(l) => ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem)
+((ADD|SUB|MUL|DIV)SD x l:(MOVSDload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) => ((ADD|SUB|MUL|DIV)SDload x [off] {sym} ptr mem)
+((ADD|SUB|MUL|DIV)SS x l:(MOVSSload [off] {sym} ptr mem)) && canMergeLoadClobber(v, l, x) && clobber(l) => ((ADD|SUB|MUL|DIV)SSload x [off] {sym} ptr mem)
(MOVLstore {sym} [off] ptr y:((ADD|AND|OR|XOR)Lload x [off] {sym} ptr mem) mem) && y.Uses==1 && clobber(y) => ((ADD|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
(MOVLstore {sym} [off] ptr y:((ADD|SUB|AND|OR|XOR)L l:(MOVLload [off] {sym} ptr mem) x) mem) && y.Uses==1 && l.Uses==1 && clobber(y, l) =>
((ADD|SUB|AND|OR|XOR)Lmodify [off] {sym} ptr x mem)
diff --git a/src/cmd/compile/internal/ssa/gen/386Ops.go b/src/cmd/compile/internal/ssa/gen/386Ops.go
index ddabde7d3d..737b99c371 100644
--- a/src/cmd/compile/internal/ssa/gen/386Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/386Ops.go
@@ -51,17 +51,6 @@ var regNames386 = []string{
"SB",
}
-// Notes on 387 support.
-// - The 387 has a weird stack-register setup for floating-point registers.
-// We use these registers when SSE registers are not available (when GO386=387).
-// - We use the same register names (X0-X7) but they refer to the 387
-// floating-point registers. That way, most of the SSA backend is unchanged.
-// - The instruction generation pass maintains an SSE->387 register mapping.
-// This mapping is updated whenever the FP stack is pushed or popped so that
-// we can always find a given SSE register even when the TOS pointer has changed.
-// - To facilitate the mapping from SSE to 387, we enforce that
-// every basic block starts and ends with an empty floating-point stack.
-
func init() {
// Make map from reg names to reg integers.
if len(regNames386) > 64 {
@@ -552,9 +541,6 @@ func init() {
{name: "FlagGT_UGT"}, // signed > and unsigned <
{name: "FlagGT_ULT"}, // signed > and unsigned >
- // Special op for -x on 387
- {name: "FCHS", argLength: 1, reg: fp11},
-
// Special ops for PIC floating-point constants.
// MOVSXconst1 loads the address of the constant-pool entry into a register.
// MOVSXconst2 loads the constant from that address.
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64.rules b/src/cmd/compile/internal/ssa/gen/AMD64.rules
index 408678f054..934e7dfdb6 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64.rules
+++ b/src/cmd/compile/internal/ssa/gen/AMD64.rules
@@ -401,7 +401,7 @@
(Const32F ...) => (MOVSSconst ...)
(Const64F ...) => (MOVSDconst ...)
(ConstNil ) => (MOVQconst [0])
-(ConstBool [c]) => (MOVLconst [int32(b2i(c))])
+(ConstBool [c]) => (MOVLconst [b2i32(c)])
// Lowering calls
(StaticCall ...) => (CALLstatic ...)
@@ -530,8 +530,10 @@
(AtomicCompareAndSwap64 ptr old new_ mem) => (CMPXCHGQlock ptr old new_ mem)
// Atomic memory updates.
-(AtomicAnd8 ptr val mem) => (ANDBlock ptr val mem)
-(AtomicOr8 ptr val mem) => (ORBlock ptr val mem)
+(AtomicAnd8 ptr val mem) => (ANDBlock ptr val mem)
+(AtomicAnd32 ptr val mem) => (ANDLlock ptr val mem)
+(AtomicOr8 ptr val mem) => (ORBlock ptr val mem)
+(AtomicOr32 ptr val mem) => (ORLlock ptr val mem)
// Write barrier.
(WB ...) => (LoweredWB ...)
@@ -957,7 +959,7 @@
(MUL(Q|L)const [73] x) => (LEA(Q|L)8 x (LEA(Q|L)8 <v.Type> x x))
(MUL(Q|L)const [81] x) => (LEA(Q|L)8 (LEA(Q|L)8 <v.Type> x x) (LEA(Q|L)8 <v.Type> x x))
-(MUL(Q|L)const [c] x) && isPowerOfTwo(int64(c)+1) && c >= 15 => (SUB(Q|L) (SHL(Q|L)const <v.Type> [int8(log2(int64(c)+1))] x) x)
+(MUL(Q|L)const [c] x) && isPowerOfTwo64(int64(c)+1) && c >= 15 => (SUB(Q|L) (SHL(Q|L)const <v.Type> [int8(log2(int64(c)+1))] x) x)
(MUL(Q|L)const [c] x) && isPowerOfTwo32(c-1) && c >= 17 => (LEA(Q|L)1 (SHL(Q|L)const <v.Type> [int8(log32(c-1))] x) x)
(MUL(Q|L)const [c] x) && isPowerOfTwo32(c-2) && c >= 34 => (LEA(Q|L)2 (SHL(Q|L)const <v.Type> [int8(log32(c-2))] x) x)
(MUL(Q|L)const [c] x) && isPowerOfTwo32(c-4) && c >= 68 => (LEA(Q|L)4 (SHL(Q|L)const <v.Type> [int8(log32(c-4))] x) x)
@@ -1274,8 +1276,8 @@
(CMPQconst (ANDQconst _ [m]) [n]) && 0 <= m && m < n => (FlagLT_ULT)
(CMPQconst (ANDLconst _ [m]) [n]) && 0 <= m && m < n => (FlagLT_ULT)
(CMPLconst (ANDLconst _ [m]) [n]) && 0 <= m && m < n => (FlagLT_ULT)
-(CMPWconst (ANDLconst _ [m]) [n]) && 0 <= m && int16(m) < n => (FlagLT_ULT)
-(CMPBconst (ANDLconst _ [m]) [n]) && 0 <= m && int8(m) < n => (FlagLT_ULT)
+(CMPWconst (ANDLconst _ [m]) [n]) && 0 <= int16(m) && int16(m) < n => (FlagLT_ULT)
+(CMPBconst (ANDLconst _ [m]) [n]) && 0 <= int8(m) && int8(m) < n => (FlagLT_ULT)
// TESTQ c c sets flags like CMPQ c 0.
(TESTQconst [c] (MOVQconst [d])) && int64(c) == d && c == 0 => (FlagEQ)
diff --git a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
index 2df5016d59..de5372670b 100644
--- a/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/AMD64Ops.go
@@ -902,7 +902,9 @@ func init() {
// Atomic memory updates.
{name: "ANDBlock", argLength: 3, reg: gpstore, asm: "ANDB", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"}, // *(arg0+auxint+aux) &= arg1
+ {name: "ANDLlock", argLength: 3, reg: gpstore, asm: "ANDL", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"}, // *(arg0+auxint+aux) &= arg1
{name: "ORBlock", argLength: 3, reg: gpstore, asm: "ORB", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"}, // *(arg0+auxint+aux) |= arg1
+ {name: "ORLlock", argLength: 3, reg: gpstore, asm: "ORL", aux: "SymOff", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true, symEffect: "RdWr"}, // *(arg0+auxint+aux) |= arg1
}
var AMD64blocks = []blockData{
diff --git a/src/cmd/compile/internal/ssa/gen/ARM.rules b/src/cmd/compile/internal/ssa/gen/ARM.rules
index 9490805f46..f48abcd202 100644
--- a/src/cmd/compile/internal/ssa/gen/ARM.rules
+++ b/src/cmd/compile/internal/ssa/gen/ARM.rules
@@ -169,10 +169,10 @@
(Rsh8x64 x (Const64 [c])) && uint64(c) >= 8 => (SRAconst (SLLconst <typ.UInt32> x [24]) [31])
// constants
-(Const(8|16|32) ...) -> (MOVWconst ...)
-(Const(32F|64F) ...) -> (MOV(F|D)const ...)
+(Const(8|16|32) [val]) => (MOVWconst [int32(val)])
+(Const(32|64)F [val]) => (MOV(F|D)const [float64(val)])
(ConstNil) => (MOVWconst [0])
-(ConstBool ...) -> (MOVWconst ...)
+(ConstBool [b]) => (MOVWconst [b2i32(b)])
// truncations
// Because we ignore high parts of registers, truncates are just copies.
@@ -243,10 +243,10 @@
(Leq16U x y) => (LessEqualU (CMP (ZeroExt16to32 x) (ZeroExt16to32 y)))
(Leq32U x y) => (LessEqualU (CMP x y))
-(OffPtr [off] ptr:(SP)) -> (MOVWaddr [off] ptr)
-(OffPtr [off] ptr) -> (ADDconst [off] ptr)
+(OffPtr [off] ptr:(SP)) => (MOVWaddr [int32(off)] ptr)
+(OffPtr [off] ptr) => (ADDconst [int32(off)] ptr)
-(Addr ...) -> (MOVWaddr ...)
+(Addr {sym} base) => (MOVWaddr {sym} base)
(LocalAddr {sym} base _) => (MOVWaddr {sym} base)
// loads
@@ -1052,8 +1052,8 @@
(BICshiftRL x (MOVWconst [c]) [d]) => (BICconst x [int32(uint32(c)>>uint64(d))])
(BICshiftRA x (MOVWconst [c]) [d]) => (BICconst x [c>>uint64(d)])
(MVNshiftLL (MOVWconst [c]) [d]) => (MOVWconst [^(c<<uint64(d))])
-(MVNshiftRL (MOVWconst [c]) [d]) -> (MOVWconst [^int64(uint32(c)>>uint64(d))])
-(MVNshiftRA (MOVWconst [c]) [d]) -> (MOVWconst [^int64(int32(c)>>uint64(d))])
+(MVNshiftRL (MOVWconst [c]) [d]) => (MOVWconst [^int32(uint32(c)>>uint64(d))])
+(MVNshiftRA (MOVWconst [c]) [d]) => (MOVWconst [int32(c)>>uint64(d)])
(CMPshiftLL x (MOVWconst [c]) [d]) => (CMPconst x [c<<uint64(d)])
(CMPshiftRL x (MOVWconst [c]) [d]) => (CMPconst x [int32(uint32(c)>>uint64(d))])
(CMPshiftRA x (MOVWconst [c]) [d]) => (CMPconst x [c>>uint64(d)])
@@ -1190,12 +1190,12 @@
(MOVWstoreidx ptr (SRAconst idx [c]) val mem) => (MOVWstoreshiftRA ptr idx [c] val mem)
(MOVWstoreidx (SRAconst idx [c]) ptr val mem) => (MOVWstoreshiftRA ptr idx [c] val mem)
-(MOVWloadshiftLL ptr (MOVWconst [c]) [d] mem) -> (MOVWload [int64(uint32(c)<<uint64(d))] ptr mem)
-(MOVWloadshiftRL ptr (MOVWconst [c]) [d] mem) -> (MOVWload [int64(uint32(c)>>uint64(d))] ptr mem)
+(MOVWloadshiftLL ptr (MOVWconst [c]) [d] mem) => (MOVWload [int32(uint32(c)<<uint64(d))] ptr mem)
+(MOVWloadshiftRL ptr (MOVWconst [c]) [d] mem) => (MOVWload [int32(uint32(c)>>uint64(d))] ptr mem)
(MOVWloadshiftRA ptr (MOVWconst [c]) [d] mem) => (MOVWload [c>>uint64(d)] ptr mem)
-(MOVWstoreshiftLL ptr (MOVWconst [c]) [d] val mem) -> (MOVWstore [int64(uint32(c)<<uint64(d))] ptr val mem)
-(MOVWstoreshiftRL ptr (MOVWconst [c]) [d] val mem) -> (MOVWstore [int64(uint32(c)>>uint64(d))] ptr val mem)
+(MOVWstoreshiftLL ptr (MOVWconst [c]) [d] val mem) => (MOVWstore [int32(uint32(c)<<uint64(d))] ptr val mem)
+(MOVWstoreshiftRL ptr (MOVWconst [c]) [d] val mem) => (MOVWstore [int32(uint32(c)>>uint64(d))] ptr val mem)
(MOVWstoreshiftRA ptr (MOVWconst [c]) [d] val mem) => (MOVWstore [c>>uint64(d)] ptr val mem)
// generic simplifications
@@ -1263,8 +1263,8 @@
(SRLconst (SLLconst x [c]) [d]) && objabi.GOARM==7 && uint64(d)>=uint64(c) && uint64(d)<=31 => (BFXU [(d-c)|(32-d)<<8] x)
// comparison simplification
-(CMP x (RSBconst [0] y)) => (CMN x y)
-(CMN x (RSBconst [0] y)) => (CMP x y)
+((LT|LE|EQ|NE|GE|GT) (CMP x (RSBconst [0] y))) => ((LT|LE|EQ|NE|GE|GT) (CMN x y)) // sense of carry bit not preserved
+((LT|LE|EQ|NE|GE|GT) (CMN x (RSBconst [0] y))) => ((LT|LE|EQ|NE|GE|GT) (CMP x y)) // sense of carry bit not preserved
(EQ (CMPconst [0] l:(SUB x y)) yes no) && l.Uses==1 => (EQ (CMP x y) yes no)
(EQ (CMPconst [0] l:(MULS x y a)) yes no) && l.Uses==1 => (EQ (CMP a (MUL <x.Type> x y)) yes no)
(EQ (CMPconst [0] l:(SUBconst [c] x)) yes no) && l.Uses==1 => (EQ (CMPconst [c] x) yes no)
@@ -1470,6 +1470,6 @@
(GE (CMPconst [0] l:(XORshiftRLreg x y z)) yes no) && l.Uses==1 => (GE (TEQshiftRLreg x y z) yes no)
(GE (CMPconst [0] l:(XORshiftRAreg x y z)) yes no) && l.Uses==1 => (GE (TEQshiftRAreg x y z) yes no)
-(MOVBUload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVWconst [int64(read8(sym, off))])
-(MOVHUload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVWconst [int64(read16(sym, off, config.ctxt.Arch.ByteOrder))])
-(MOVWload [off] {sym} (SB) _) && symIsRO(sym) -> (MOVWconst [int64(int32(read32(sym, off, config.ctxt.Arch.ByteOrder)))])
+(MOVBUload [off] {sym} (SB) _) && symIsRO(sym) => (MOVWconst [int32(read8(sym, int64(off)))])
+(MOVHUload [off] {sym} (SB) _) && symIsRO(sym) => (MOVWconst [int32(read16(sym, int64(off), config.ctxt.Arch.ByteOrder))])
+(MOVWload [off] {sym} (SB) _) && symIsRO(sym) => (MOVWconst [int32(read32(sym, int64(off), config.ctxt.Arch.ByteOrder))])
diff --git a/src/cmd/compile/internal/ssa/gen/ARM64.rules b/src/cmd/compile/internal/ssa/gen/ARM64.rules
index c4a3532632..c50a8c7778 100644
--- a/src/cmd/compile/internal/ssa/gen/ARM64.rules
+++ b/src/cmd/compile/internal/ssa/gen/ARM64.rules
@@ -548,8 +548,10 @@
(AtomicCompareAndSwap(32|64) ...) => (LoweredAtomicCas(32|64) ...)
// Currently the updated value is not used, but we need a register to temporarily hold it.
-(AtomicAnd8 ptr val mem) => (Select1 (LoweredAtomicAnd8 ptr val mem))
-(AtomicOr8 ptr val mem) => (Select1 (LoweredAtomicOr8 ptr val mem))
+(AtomicAnd8 ptr val mem) => (Select1 (LoweredAtomicAnd8 ptr val mem))
+(AtomicAnd32 ptr val mem) => (Select1 (LoweredAtomicAnd32 ptr val mem))
+(AtomicOr8 ptr val mem) => (Select1 (LoweredAtomicOr8 ptr val mem))
+(AtomicOr32 ptr val mem) => (Select1 (LoweredAtomicOr32 ptr val mem))
(AtomicAdd(32|64)Variant ...) => (LoweredAtomicAdd(32|64)Variant ...)
@@ -1171,145 +1173,145 @@
(MUL x (MOVDconst [-1])) => (NEG x)
(MUL _ (MOVDconst [0])) => (MOVDconst [0])
(MUL x (MOVDconst [1])) => x
-(MUL x (MOVDconst [c])) && isPowerOfTwo(c) => (SLLconst [log2(c)] x)
-(MUL x (MOVDconst [c])) && isPowerOfTwo(c-1) && c >= 3 => (ADDshiftLL x x [log2(c-1)])
-(MUL x (MOVDconst [c])) && isPowerOfTwo(c+1) && c >= 7 => (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
-(MUL x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) => (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
-(MUL x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) => (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
-(MUL x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) => (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
-(MUL x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) => (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
+(MUL x (MOVDconst [c])) && isPowerOfTwo64(c) => (SLLconst [log2(c)] x)
+(MUL x (MOVDconst [c])) && isPowerOfTwo64(c-1) && c >= 3 => (ADDshiftLL x x [log2(c-1)])
+(MUL x (MOVDconst [c])) && isPowerOfTwo64(c+1) && c >= 7 => (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+(MUL x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) => (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+(MUL x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) => (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+(MUL x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) => (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
+(MUL x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) => (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
(MULW x (MOVDconst [c])) && int32(c)==-1 => (NEG x)
(MULW _ (MOVDconst [c])) && int32(c)==0 => (MOVDconst [0])
(MULW x (MOVDconst [c])) && int32(c)==1 => x
-(MULW x (MOVDconst [c])) && isPowerOfTwo(c) => (SLLconst [log2(c)] x)
-(MULW x (MOVDconst [c])) && isPowerOfTwo(c-1) && int32(c) >= 3 => (ADDshiftLL x x [log2(c-1)])
-(MULW x (MOVDconst [c])) && isPowerOfTwo(c+1) && int32(c) >= 7 => (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
-(MULW x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) => (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
-(MULW x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) => (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
-(MULW x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) => (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
-(MULW x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) => (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
+(MULW x (MOVDconst [c])) && isPowerOfTwo64(c) => (SLLconst [log2(c)] x)
+(MULW x (MOVDconst [c])) && isPowerOfTwo64(c-1) && int32(c) >= 3 => (ADDshiftLL x x [log2(c-1)])
+(MULW x (MOVDconst [c])) && isPowerOfTwo64(c+1) && int32(c) >= 7 => (ADDshiftLL (NEG <x.Type> x) x [log2(c+1)])
+(MULW x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (SLLconst [log2(c/3)] (ADDshiftLL <x.Type> x x [1]))
+(MULW x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (SLLconst [log2(c/5)] (ADDshiftLL <x.Type> x x [2]))
+(MULW x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (SLLconst [log2(c/7)] (ADDshiftLL <x.Type> (NEG <x.Type> x) x [3]))
+(MULW x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (SLLconst [log2(c/9)] (ADDshiftLL <x.Type> x x [3]))
// mneg by constant
(MNEG x (MOVDconst [-1])) => x
(MNEG _ (MOVDconst [0])) => (MOVDconst [0])
(MNEG x (MOVDconst [1])) => (NEG x)
-(MNEG x (MOVDconst [c])) && isPowerOfTwo(c) => (NEG (SLLconst <x.Type> [log2(c)] x))
-(MNEG x (MOVDconst [c])) && isPowerOfTwo(c-1) && c >= 3 => (NEG (ADDshiftLL <x.Type> x x [log2(c-1)]))
-(MNEG x (MOVDconst [c])) && isPowerOfTwo(c+1) && c >= 7 => (NEG (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log2(c+1)]))
-(MNEG x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) => (SLLconst <x.Type> [log2(c/3)] (SUBshiftLL <x.Type> x x [2]))
-(MNEG x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) => (NEG (SLLconst <x.Type> [log2(c/5)] (ADDshiftLL <x.Type> x x [2])))
-(MNEG x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) => (SLLconst <x.Type> [log2(c/7)] (SUBshiftLL <x.Type> x x [3]))
-(MNEG x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) => (NEG (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3])))
+(MNEG x (MOVDconst [c])) && isPowerOfTwo64(c) => (NEG (SLLconst <x.Type> [log2(c)] x))
+(MNEG x (MOVDconst [c])) && isPowerOfTwo64(c-1) && c >= 3 => (NEG (ADDshiftLL <x.Type> x x [log2(c-1)]))
+(MNEG x (MOVDconst [c])) && isPowerOfTwo64(c+1) && c >= 7 => (NEG (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log2(c+1)]))
+(MNEG x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) => (SLLconst <x.Type> [log2(c/3)] (SUBshiftLL <x.Type> x x [2]))
+(MNEG x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) => (NEG (SLLconst <x.Type> [log2(c/5)] (ADDshiftLL <x.Type> x x [2])))
+(MNEG x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) => (SLLconst <x.Type> [log2(c/7)] (SUBshiftLL <x.Type> x x [3]))
+(MNEG x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) => (NEG (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3])))
(MNEGW x (MOVDconst [c])) && int32(c)==-1 => x
(MNEGW _ (MOVDconst [c])) && int32(c)==0 => (MOVDconst [0])
(MNEGW x (MOVDconst [c])) && int32(c)==1 => (NEG x)
-(MNEGW x (MOVDconst [c])) && isPowerOfTwo(c) => (NEG (SLLconst <x.Type> [log2(c)] x))
-(MNEGW x (MOVDconst [c])) && isPowerOfTwo(c-1) && int32(c) >= 3 => (NEG (ADDshiftLL <x.Type> x x [log2(c-1)]))
-(MNEGW x (MOVDconst [c])) && isPowerOfTwo(c+1) && int32(c) >= 7 => (NEG (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log2(c+1)]))
-(MNEGW x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) => (SLLconst <x.Type> [log2(c/3)] (SUBshiftLL <x.Type> x x [2]))
-(MNEGW x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) => (NEG (SLLconst <x.Type> [log2(c/5)] (ADDshiftLL <x.Type> x x [2])))
-(MNEGW x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) => (SLLconst <x.Type> [log2(c/7)] (SUBshiftLL <x.Type> x x [3]))
-(MNEGW x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) => (NEG (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3])))
+(MNEGW x (MOVDconst [c])) && isPowerOfTwo64(c) => (NEG (SLLconst <x.Type> [log2(c)] x))
+(MNEGW x (MOVDconst [c])) && isPowerOfTwo64(c-1) && int32(c) >= 3 => (NEG (ADDshiftLL <x.Type> x x [log2(c-1)]))
+(MNEGW x (MOVDconst [c])) && isPowerOfTwo64(c+1) && int32(c) >= 7 => (NEG (ADDshiftLL <x.Type> (NEG <x.Type> x) x [log2(c+1)]))
+(MNEGW x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (SLLconst <x.Type> [log2(c/3)] (SUBshiftLL <x.Type> x x [2]))
+(MNEGW x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (NEG (SLLconst <x.Type> [log2(c/5)] (ADDshiftLL <x.Type> x x [2])))
+(MNEGW x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (SLLconst <x.Type> [log2(c/7)] (SUBshiftLL <x.Type> x x [3]))
+(MNEGW x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (NEG (SLLconst <x.Type> [log2(c/9)] (ADDshiftLL <x.Type> x x [3])))
(MADD a x (MOVDconst [-1])) => (SUB a x)
(MADD a _ (MOVDconst [0])) => a
(MADD a x (MOVDconst [1])) => (ADD a x)
-(MADD a x (MOVDconst [c])) && isPowerOfTwo(c) => (ADDshiftLL a x [log2(c)])
-(MADD a x (MOVDconst [c])) && isPowerOfTwo(c-1) && c>=3 => (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)]))
-(MADD a x (MOVDconst [c])) && isPowerOfTwo(c+1) && c>=7 => (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)]))
-(MADD a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
-(MADD a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
-(MADD a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
-(MADD a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
+(MADD a x (MOVDconst [c])) && isPowerOfTwo64(c) => (ADDshiftLL a x [log2(c)])
+(MADD a x (MOVDconst [c])) && isPowerOfTwo64(c-1) && c>=3 => (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)]))
+(MADD a x (MOVDconst [c])) && isPowerOfTwo64(c+1) && c>=7 => (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)]))
+(MADD a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
+(MADD a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
+(MADD a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
+(MADD a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
(MADD a (MOVDconst [-1]) x) => (SUB a x)
(MADD a (MOVDconst [0]) _) => a
(MADD a (MOVDconst [1]) x) => (ADD a x)
-(MADD a (MOVDconst [c]) x) && isPowerOfTwo(c) => (ADDshiftLL a x [log2(c)])
-(MADD a (MOVDconst [c]) x) && isPowerOfTwo(c-1) && c>=3 => (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)]))
-(MADD a (MOVDconst [c]) x) && isPowerOfTwo(c+1) && c>=7 => (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)]))
-(MADD a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
-(MADD a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
-(MADD a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
-(MADD a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
+(MADD a (MOVDconst [c]) x) && isPowerOfTwo64(c) => (ADDshiftLL a x [log2(c)])
+(MADD a (MOVDconst [c]) x) && isPowerOfTwo64(c-1) && c>=3 => (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)]))
+(MADD a (MOVDconst [c]) x) && isPowerOfTwo64(c+1) && c>=7 => (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)]))
+(MADD a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo64(c/3) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
+(MADD a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo64(c/5) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
+(MADD a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo64(c/7) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
+(MADD a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo64(c/9) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
(MADDW a x (MOVDconst [c])) && int32(c)==-1 => (SUB a x)
(MADDW a _ (MOVDconst [c])) && int32(c)==0 => a
(MADDW a x (MOVDconst [c])) && int32(c)==1 => (ADD a x)
-(MADDW a x (MOVDconst [c])) && isPowerOfTwo(c) => (ADDshiftLL a x [log2(c)])
-(MADDW a x (MOVDconst [c])) && isPowerOfTwo(c-1) && int32(c)>=3 => (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)]))
-(MADDW a x (MOVDconst [c])) && isPowerOfTwo(c+1) && int32(c)>=7 => (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)]))
-(MADDW a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
-(MADDW a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
-(MADDW a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
-(MADDW a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
+(MADDW a x (MOVDconst [c])) && isPowerOfTwo64(c) => (ADDshiftLL a x [log2(c)])
+(MADDW a x (MOVDconst [c])) && isPowerOfTwo64(c-1) && int32(c)>=3 => (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)]))
+(MADDW a x (MOVDconst [c])) && isPowerOfTwo64(c+1) && int32(c)>=7 => (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)]))
+(MADDW a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
+(MADDW a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
+(MADDW a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
+(MADDW a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
(MADDW a (MOVDconst [c]) x) && int32(c)==-1 => (SUB a x)
(MADDW a (MOVDconst [c]) _) && int32(c)==0 => a
(MADDW a (MOVDconst [c]) x) && int32(c)==1 => (ADD a x)
-(MADDW a (MOVDconst [c]) x) && isPowerOfTwo(c) => (ADDshiftLL a x [log2(c)])
-(MADDW a (MOVDconst [c]) x) && isPowerOfTwo(c-1) && int32(c)>=3 => (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)]))
-(MADDW a (MOVDconst [c]) x) && isPowerOfTwo(c+1) && int32(c)>=7 => (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)]))
-(MADDW a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
-(MADDW a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
-(MADDW a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
-(MADDW a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
+(MADDW a (MOVDconst [c]) x) && isPowerOfTwo64(c) => (ADDshiftLL a x [log2(c)])
+(MADDW a (MOVDconst [c]) x) && isPowerOfTwo64(c-1) && int32(c)>=3 => (ADD a (ADDshiftLL <x.Type> x x [log2(c-1)]))
+(MADDW a (MOVDconst [c]) x) && isPowerOfTwo64(c+1) && int32(c)>=7 => (SUB a (SUBshiftLL <x.Type> x x [log2(c+1)]))
+(MADDW a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
+(MADDW a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
+(MADDW a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (SUBshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
+(MADDW a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (ADDshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
(MSUB a x (MOVDconst [-1])) => (ADD a x)
(MSUB a _ (MOVDconst [0])) => a
(MSUB a x (MOVDconst [1])) => (SUB a x)
-(MSUB a x (MOVDconst [c])) && isPowerOfTwo(c) => (SUBshiftLL a x [log2(c)])
-(MSUB a x (MOVDconst [c])) && isPowerOfTwo(c-1) && c>=3 => (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)]))
-(MSUB a x (MOVDconst [c])) && isPowerOfTwo(c+1) && c>=7 => (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)]))
-(MSUB a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
-(MSUB a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
-(MSUB a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
-(MSUB a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
+(MSUB a x (MOVDconst [c])) && isPowerOfTwo64(c) => (SUBshiftLL a x [log2(c)])
+(MSUB a x (MOVDconst [c])) && isPowerOfTwo64(c-1) && c>=3 => (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)]))
+(MSUB a x (MOVDconst [c])) && isPowerOfTwo64(c+1) && c>=7 => (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)]))
+(MSUB a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
+(MSUB a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
+(MSUB a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
+(MSUB a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
(MSUB a (MOVDconst [-1]) x) => (ADD a x)
(MSUB a (MOVDconst [0]) _) => a
(MSUB a (MOVDconst [1]) x) => (SUB a x)
-(MSUB a (MOVDconst [c]) x) && isPowerOfTwo(c) => (SUBshiftLL a x [log2(c)])
-(MSUB a (MOVDconst [c]) x) && isPowerOfTwo(c-1) && c>=3 => (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)]))
-(MSUB a (MOVDconst [c]) x) && isPowerOfTwo(c+1) && c>=7 => (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)]))
-(MSUB a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
-(MSUB a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
-(MSUB a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
-(MSUB a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
+(MSUB a (MOVDconst [c]) x) && isPowerOfTwo64(c) => (SUBshiftLL a x [log2(c)])
+(MSUB a (MOVDconst [c]) x) && isPowerOfTwo64(c-1) && c>=3 => (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)]))
+(MSUB a (MOVDconst [c]) x) && isPowerOfTwo64(c+1) && c>=7 => (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)]))
+(MSUB a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo64(c/3) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
+(MSUB a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo64(c/5) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
+(MSUB a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo64(c/7) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
+(MSUB a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo64(c/9) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
(MSUBW a x (MOVDconst [c])) && int32(c)==-1 => (ADD a x)
(MSUBW a _ (MOVDconst [c])) && int32(c)==0 => a
(MSUBW a x (MOVDconst [c])) && int32(c)==1 => (SUB a x)
-(MSUBW a x (MOVDconst [c])) && isPowerOfTwo(c) => (SUBshiftLL a x [log2(c)])
-(MSUBW a x (MOVDconst [c])) && isPowerOfTwo(c-1) && int32(c)>=3 => (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)]))
-(MSUBW a x (MOVDconst [c])) && isPowerOfTwo(c+1) && int32(c)>=7 => (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)]))
-(MSUBW a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
-(MSUBW a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
-(MSUBW a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
-(MSUBW a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
+(MSUBW a x (MOVDconst [c])) && isPowerOfTwo64(c) => (SUBshiftLL a x [log2(c)])
+(MSUBW a x (MOVDconst [c])) && isPowerOfTwo64(c-1) && int32(c)>=3 => (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)]))
+(MSUBW a x (MOVDconst [c])) && isPowerOfTwo64(c+1) && int32(c)>=7 => (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)]))
+(MSUBW a x (MOVDconst [c])) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
+(MSUBW a x (MOVDconst [c])) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
+(MSUBW a x (MOVDconst [c])) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
+(MSUBW a x (MOVDconst [c])) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
(MSUBW a (MOVDconst [c]) x) && int32(c)==-1 => (ADD a x)
(MSUBW a (MOVDconst [c]) _) && int32(c)==0 => a
(MSUBW a (MOVDconst [c]) x) && int32(c)==1 => (SUB a x)
-(MSUBW a (MOVDconst [c]) x) && isPowerOfTwo(c) => (SUBshiftLL a x [log2(c)])
-(MSUBW a (MOVDconst [c]) x) && isPowerOfTwo(c-1) && int32(c)>=3 => (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)]))
-(MSUBW a (MOVDconst [c]) x) && isPowerOfTwo(c+1) && int32(c)>=7 => (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)]))
-(MSUBW a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo(c/3) && is32Bit(c) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
-(MSUBW a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo(c/5) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
-(MSUBW a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo(c/7) && is32Bit(c) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
-(MSUBW a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo(c/9) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
+(MSUBW a (MOVDconst [c]) x) && isPowerOfTwo64(c) => (SUBshiftLL a x [log2(c)])
+(MSUBW a (MOVDconst [c]) x) && isPowerOfTwo64(c-1) && int32(c)>=3 => (SUB a (ADDshiftLL <x.Type> x x [log2(c-1)]))
+(MSUBW a (MOVDconst [c]) x) && isPowerOfTwo64(c+1) && int32(c)>=7 => (ADD a (SUBshiftLL <x.Type> x x [log2(c+1)]))
+(MSUBW a (MOVDconst [c]) x) && c%3 == 0 && isPowerOfTwo64(c/3) && is32Bit(c) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [2]) [log2(c/3)])
+(MSUBW a (MOVDconst [c]) x) && c%5 == 0 && isPowerOfTwo64(c/5) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [2]) [log2(c/5)])
+(MSUBW a (MOVDconst [c]) x) && c%7 == 0 && isPowerOfTwo64(c/7) && is32Bit(c) => (ADDshiftLL a (SUBshiftLL <x.Type> x x [3]) [log2(c/7)])
+(MSUBW a (MOVDconst [c]) x) && c%9 == 0 && isPowerOfTwo64(c/9) && is32Bit(c) => (SUBshiftLL a (ADDshiftLL <x.Type> x x [3]) [log2(c/9)])
// div by constant
(UDIV x (MOVDconst [1])) => x
-(UDIV x (MOVDconst [c])) && isPowerOfTwo(c) => (SRLconst [log2(c)] x)
+(UDIV x (MOVDconst [c])) && isPowerOfTwo64(c) => (SRLconst [log2(c)] x)
(UDIVW x (MOVDconst [c])) && uint32(c)==1 => x
-(UDIVW x (MOVDconst [c])) && isPowerOfTwo(c) && is32Bit(c) => (SRLconst [log2(c)] x)
+(UDIVW x (MOVDconst [c])) && isPowerOfTwo64(c) && is32Bit(c) => (SRLconst [log2(c)] x)
(UMOD _ (MOVDconst [1])) => (MOVDconst [0])
-(UMOD x (MOVDconst [c])) && isPowerOfTwo(c) => (ANDconst [c-1] x)
+(UMOD x (MOVDconst [c])) && isPowerOfTwo64(c) => (ANDconst [c-1] x)
(UMODW _ (MOVDconst [c])) && uint32(c)==1 => (MOVDconst [0])
-(UMODW x (MOVDconst [c])) && isPowerOfTwo(c) && is32Bit(c) => (ANDconst [c-1] x)
+(UMODW x (MOVDconst [c])) && isPowerOfTwo64(c) && is32Bit(c) => (ANDconst [c-1] x)
// generic simplifications
(ADD x (NEG y)) => (SUB x y)
diff --git a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
index 9ff53f7e4e..fe9edbf933 100644
--- a/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/ARM64Ops.go
@@ -656,12 +656,14 @@ func init() {
// atomic and/or.
// *arg0 &= (|=) arg1. arg2=mem. returns <new content of *arg0, memory>. auxint must be zero.
- // LDAXRB (Rarg0), Rout
+ // LDAXR (Rarg0), Rout
// AND/OR Rarg1, Rout
- // STLXRB Rout, (Rarg0), Rtmp
+ // STLXR Rout, (Rarg0), Rtmp
// CBNZ Rtmp, -3(PC)
{name: "LoweredAtomicAnd8", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "AND", typ: "(UInt8,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
+ {name: "LoweredAtomicAnd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "AND", typ: "(UInt32,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
{name: "LoweredAtomicOr8", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "ORR", typ: "(UInt8,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
+ {name: "LoweredAtomicOr32", argLength: 3, reg: gpxchg, resultNotInArgs: true, asm: "ORR", typ: "(UInt32,Mem)", faultOnNilArg0: true, hasSideEffects: true, unsafePoint: true},
// LoweredWB invokes runtime.gcWriteBarrier. arg0=destptr, arg1=srcptr, arg2=mem, aux=runtime.gcWriteBarrier
// It saves all GP registers if necessary,
diff --git a/src/cmd/compile/internal/ssa/gen/MIPS.rules b/src/cmd/compile/internal/ssa/gen/MIPS.rules
index 96feaf9234..b6e5312224 100644
--- a/src/cmd/compile/internal/ssa/gen/MIPS.rules
+++ b/src/cmd/compile/internal/ssa/gen/MIPS.rules
@@ -143,7 +143,7 @@
(Const(32|16|8) [val]) => (MOVWconst [int32(val)])
(Const(32|64)F ...) => (MOV(F|D)const ...)
(ConstNil) => (MOVWconst [0])
-(ConstBool [b]) => (MOVWconst [int32(b2i(b))])
+(ConstBool [b]) => (MOVWconst [b2i32(b)])
// truncations
// Because we ignore high parts of registers, truncates are just copies.
@@ -383,6 +383,9 @@
(ANDconst <typ.UInt32> [3]
(XORconst <typ.UInt32> [3] ptr)))))) mem)
+(AtomicAnd32 ...) => (LoweredAtomicAnd ...)
+(AtomicOr32 ...) => (LoweredAtomicOr ...)
+
// checks
(NilCheck ...) => (LoweredNilCheck ...)
@@ -581,13 +584,13 @@
(Select0 (MULTU (MOVWconst [1]) _ )) => (MOVWconst [0])
(Select1 (MULTU (MOVWconst [-1]) x )) => (NEG <x.Type> x)
(Select0 (MULTU (MOVWconst [-1]) x )) => (CMOVZ (ADDconst <x.Type> [-1] x) (MOVWconst [0]) x)
-(Select1 (MULTU (MOVWconst [c]) x )) && isPowerOfTwo(int64(uint32(c))) => (SLLconst [int32(log2uint32(int64(c)))] x)
-(Select0 (MULTU (MOVWconst [c]) x )) && isPowerOfTwo(int64(uint32(c))) => (SRLconst [int32(32-log2uint32(int64(c)))] x)
+(Select1 (MULTU (MOVWconst [c]) x )) && isPowerOfTwo64(int64(uint32(c))) => (SLLconst [int32(log2uint32(int64(c)))] x)
+(Select0 (MULTU (MOVWconst [c]) x )) && isPowerOfTwo64(int64(uint32(c))) => (SRLconst [int32(32-log2uint32(int64(c)))] x)
(MUL (MOVWconst [0]) _ ) => (MOVWconst [0])
(MUL (MOVWconst [1]) x ) => x
(MUL (MOVWconst [-1]) x ) => (NEG x)
-(MUL (MOVWconst [c]) x ) && isPowerOfTwo(int64(uint32(c))) => (SLLconst [int32(log2uint32(int64(c)))] x)
+(MUL (MOVWconst [c]) x ) && isPowerOfTwo64(int64(uint32(c))) => (SLLconst [int32(log2uint32(int64(c)))] x)
// generic simplifications
(ADD x (NEG y)) => (SUB x y)
diff --git a/src/cmd/compile/internal/ssa/gen/MIPS64.rules b/src/cmd/compile/internal/ssa/gen/MIPS64.rules
index e008ec8703..8e4c3a07c8 100644
--- a/src/cmd/compile/internal/ssa/gen/MIPS64.rules
+++ b/src/cmd/compile/internal/ssa/gen/MIPS64.rules
@@ -580,13 +580,13 @@
(Select1 (MULVU x (MOVVconst [-1]))) => (NEGV x)
(Select1 (MULVU _ (MOVVconst [0]))) => (MOVVconst [0])
(Select1 (MULVU x (MOVVconst [1]))) => x
-(Select1 (MULVU x (MOVVconst [c]))) && isPowerOfTwo(c) => (SLLVconst [log2(c)] x)
+(Select1 (MULVU x (MOVVconst [c]))) && isPowerOfTwo64(c) => (SLLVconst [log2(c)] x)
// div by constant
(Select1 (DIVVU x (MOVVconst [1]))) => x
-(Select1 (DIVVU x (MOVVconst [c]))) && isPowerOfTwo(c) => (SRLVconst [log2(c)] x)
+(Select1 (DIVVU x (MOVVconst [c]))) && isPowerOfTwo64(c) => (SRLVconst [log2(c)] x)
(Select0 (DIVVU _ (MOVVconst [1]))) => (MOVVconst [0]) // mod
-(Select0 (DIVVU x (MOVVconst [c]))) && isPowerOfTwo(c) => (ANDconst [c-1] x) // mod
+(Select0 (DIVVU x (MOVVconst [c]))) && isPowerOfTwo64(c) => (ANDconst [c-1] x) // mod
// generic simplifications
(ADDV x (NEGV y)) => (SUBV x y)
diff --git a/src/cmd/compile/internal/ssa/gen/PPC64.rules b/src/cmd/compile/internal/ssa/gen/PPC64.rules
index de30d003e6..558b09c9f2 100644
--- a/src/cmd/compile/internal/ssa/gen/PPC64.rules
+++ b/src/cmd/compile/internal/ssa/gen/PPC64.rules
@@ -150,6 +150,31 @@
(ROTLW x (MOVDconst [c])) => (ROTLWconst x [c&31])
(ROTL x (MOVDconst [c])) => (ROTLconst x [c&63])
+// Combine rotate and mask operations
+(ANDconst [m] (ROTLWconst [r] x)) && isPPC64WordRotateMask(m) => (RLWINM [encodePPC64RotateMask(r,m,32)] x)
+(AND (MOVDconst [m]) (ROTLWconst [r] x)) && isPPC64WordRotateMask(m) => (RLWINM [encodePPC64RotateMask(r,m,32)] x)
+(ANDconst [m] (ROTLW x r)) && isPPC64WordRotateMask(m) => (RLWNM [encodePPC64RotateMask(0,m,32)] x r)
+(AND (MOVDconst [m]) (ROTLW x r)) && isPPC64WordRotateMask(m) => (RLWNM [encodePPC64RotateMask(0,m,32)] x r)
+
+// Note, any rotated word bitmask is still a valid word bitmask.
+(ROTLWconst [r] (AND (MOVDconst [m]) x)) && isPPC64WordRotateMask(m) => (RLWINM [encodePPC64RotateMask(r,rotateLeft32(m,r),32)] x)
+(ROTLWconst [r] (ANDconst [m] x)) && isPPC64WordRotateMask(m) => (RLWINM [encodePPC64RotateMask(r,rotateLeft32(m,r),32)] x)
+
+(ANDconst [m] (SRWconst x [s])) && mergePPC64RShiftMask(m,s,32) == 0 => (MOVDconst [0])
+(ANDconst [m] (SRWconst x [s])) && mergePPC64AndSrwi(m,s) != 0 => (RLWINM [mergePPC64AndSrwi(m,s)] x)
+(AND (MOVDconst [m]) (SRWconst x [s])) && mergePPC64RShiftMask(m,s,32) == 0 => (MOVDconst [0])
+(AND (MOVDconst [m]) (SRWconst x [s])) && mergePPC64AndSrwi(m,s) != 0 => (RLWINM [mergePPC64AndSrwi(m,s)] x)
+
+(SRWconst (ANDconst [m] x) [s]) && mergePPC64RShiftMask(m>>uint(s),s,32) == 0 => (MOVDconst [0])
+(SRWconst (ANDconst [m] x) [s]) && mergePPC64AndSrwi(m>>uint(s),s) != 0 => (RLWINM [mergePPC64AndSrwi(m>>uint(s),s)] x)
+(SRWconst (AND (MOVDconst [m]) x) [s]) && mergePPC64RShiftMask(m>>uint(s),s,32) == 0 => (MOVDconst [0])
+(SRWconst (AND (MOVDconst [m]) x) [s]) && mergePPC64AndSrwi(m>>uint(s),s) != 0 => (RLWINM [mergePPC64AndSrwi(m>>uint(s),s)] x)
+
+// Merge shift right + shift left and clear left (e.g for a table lookup)
+(CLRLSLDI [c] (SRWconst [s] x)) && mergePPC64ClrlsldiSrw(int64(c),s) != 0 => (RLWINM [mergePPC64ClrlsldiSrw(int64(c),s)] x)
+(SLDconst [l] (SRWconst [r] x)) && mergePPC64SldiSrw(l,r) != 0 => (RLWINM [mergePPC64SldiSrw(l,r)] x)
+// The following reduction shows up frequently too. e.g b[(x>>14)&0xFF]
+(CLRLSLDI [c] i:(RLWINM [s] x)) && mergePPC64ClrlsldiRlwinm(c,s) != 0 => (RLWINM [mergePPC64ClrlsldiRlwinm(c,s)] x)
// large constant shifts
(Lsh64x64 _ (MOVDconst [c])) && uint64(c) >= 64 => (MOVDconst [0])
@@ -821,6 +846,8 @@
(ADDconst [c] (MOVDaddr [d] {sym} x)) && is32Bit(c+int64(d)) => (MOVDaddr [int32(c+int64(d))] {sym} x)
+(MULL(W|D) x (MOVDconst [c])) && is16Bit(c) => (MULL(W|D)const [int32(c)] x)
+
// Subtract from (with carry, but ignored) constant.
// Note, these clobber the carry bit.
(SUB (MOVDconst [c]) x) && is32Bit(c) => (SUBFCconst [c] x)
@@ -965,10 +992,10 @@
// atomic intrinsics
(AtomicLoad(8|32|64|Ptr) ptr mem) => (LoweredAtomicLoad(8|32|64|Ptr) [1] ptr mem)
-(AtomicLoadAcq32 ptr mem) => (LoweredAtomicLoad32 [0] ptr mem)
+(AtomicLoadAcq(32|64) ptr mem) => (LoweredAtomicLoad(32|64) [0] ptr mem)
(AtomicStore(8|32|64) ptr val mem) => (LoweredAtomicStore(8|32|64) [1] ptr val mem)
-(AtomicStoreRel32 ptr val mem) => (LoweredAtomicStore32 [0] ptr val mem)
+(AtomicStoreRel(32|64) ptr val mem) => (LoweredAtomicStore(32|64) [0] ptr val mem)
//(AtomicStorePtrNoWB ptr val mem) => (STLR ptr val mem)
(AtomicExchange(32|64) ...) => (LoweredAtomicExchange(32|64) ...)
@@ -978,8 +1005,10 @@
(AtomicCompareAndSwap(32|64) ptr old new_ mem) => (LoweredAtomicCas(32|64) [1] ptr old new_ mem)
(AtomicCompareAndSwapRel32 ptr old new_ mem) => (LoweredAtomicCas32 [0] ptr old new_ mem)
-(AtomicAnd8 ...) => (LoweredAtomicAnd8 ...)
-(AtomicOr8 ...) => (LoweredAtomicOr8 ...)
+(AtomicAnd8 ...) => (LoweredAtomicAnd8 ...)
+(AtomicAnd32 ...) => (LoweredAtomicAnd32 ...)
+(AtomicOr8 ...) => (LoweredAtomicOr8 ...)
+(AtomicOr32 ...) => (LoweredAtomicOr32 ...)
(Slicemask <t> x) => (SRADconst (NEG <t> x) [63])
@@ -1018,13 +1047,12 @@
(SLDconst [c] z:(MOVHZreg x)) && c < 16 && z.Uses == 1 => (CLRLSLDI [newPPC64ShiftAuxInt(c,48,63,64)] x)
(SLDconst [c] z:(MOVWZreg x)) && c < 32 && z.Uses == 1 => (CLRLSLDI [newPPC64ShiftAuxInt(c,32,63,64)] x)
-(SLDconst [c] z:(ANDconst [d] x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x)
-(SLDconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x)
+(SLDconst [c] z:(ANDconst [d] x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) && c <= (64-getPPC64ShiftMaskLength(d)) => (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x)
+(SLDconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) && c<=(64-getPPC64ShiftMaskLength(d)) => (CLRLSLDI [newPPC64ShiftAuxInt(c,64-getPPC64ShiftMaskLength(d),63,64)] x)
(SLWconst [c] z:(MOVBZreg x)) && z.Uses == 1 && c < 8 => (CLRLSLWI [newPPC64ShiftAuxInt(c,24,31,32)] x)
(SLWconst [c] z:(MOVHZreg x)) && z.Uses == 1 && c < 16 => (CLRLSLWI [newPPC64ShiftAuxInt(c,16,31,32)] x)
-(SLWconst [c] z:(MOVWZreg x)) && z.Uses == 1 && c < 24 => (CLRLSLWI [newPPC64ShiftAuxInt(c,8,31,32)] x)
-(SLWconst [c] z:(ANDconst [d] x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x)
-(SLWconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x)
+(SLWconst [c] z:(ANDconst [d] x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) && c<=(32-getPPC64ShiftMaskLength(d)) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x)
+(SLWconst [c] z:(AND (MOVDconst [d]) x)) && z.Uses == 1 && isPPC64ValidShiftMask(d) && c<=(32-getPPC64ShiftMaskLength(d)) => (CLRLSLWI [newPPC64ShiftAuxInt(c,32-getPPC64ShiftMaskLength(d),31,32)] x)
// special case for power9
(SL(W|D)const [c] z:(MOVWreg x)) && c < 32 && objabi.GOPPC64 >= 9 => (EXTSWSLconst [c] x)
diff --git a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go
index 28317928a8..f7198b90c3 100644
--- a/src/cmd/compile/internal/ssa/gen/PPC64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/PPC64Ops.go
@@ -137,6 +137,7 @@ func init() {
gp01 = regInfo{inputs: nil, outputs: []regMask{gp}}
gp11 = regInfo{inputs: []regMask{gp | sp | sb}, outputs: []regMask{gp}}
gp21 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
+ gp21a0 = regInfo{inputs: []regMask{gp, gp | sp | sb}, outputs: []regMask{gp}}
gp31 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp}}
gp22 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, gp}}
gp32 = regInfo{inputs: []regMask{gp | sp | sb, gp | sp | sb, gp | sp | sb}, outputs: []regMask{gp, gp}}
@@ -181,6 +182,8 @@ func init() {
{name: "MULLD", argLength: 2, reg: gp21, asm: "MULLD", typ: "Int64", commutative: true}, // arg0*arg1 (signed 64-bit)
{name: "MULLW", argLength: 2, reg: gp21, asm: "MULLW", typ: "Int32", commutative: true}, // arg0*arg1 (signed 32-bit)
+ {name: "MULLDconst", argLength: 1, reg: gp11, asm: "MULLD", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit)
+ {name: "MULLWconst", argLength: 1, reg: gp11, asm: "MULLW", aux: "Int32", typ: "Int64"}, // arg0*auxInt (signed 64-bit)
{name: "MADDLD", argLength: 3, reg: gp31, asm: "MADDLD", typ: "Int64"}, // (arg0*arg1)+arg2 (signed 64-bit)
{name: "MULHD", argLength: 2, reg: gp21, asm: "MULHD", commutative: true}, // (arg0 * arg1) >> 64, signed
@@ -225,6 +228,10 @@ func init() {
{name: "ROTLWconst", argLength: 1, reg: gp11, asm: "ROTLW", aux: "Int64"}, // uint32(arg0) rotate left by auxInt bits
{name: "EXTSWSLconst", argLength: 1, reg: gp11, asm: "EXTSWSLI", aux: "Int64"},
+ {name: "RLWINM", argLength: 1, reg: gp11, asm: "RLWNM", aux: "Int64"}, // Rotate and mask by immediate "rlwinm". encodePPC64RotateMask describes aux
+ {name: "RLWNM", argLength: 2, reg: gp21, asm: "RLWNM", aux: "Int64"}, // Rotate and mask by "rlwnm". encodePPC64RotateMask describes aux
+ {name: "RLWMI", argLength: 2, reg: gp21a0, asm: "RLWMI", aux: "Int64", resultInArg0: true}, // "rlwimi" similar aux encoding as above
+
{name: "CNTLZD", argLength: 1, reg: gp11, asm: "CNTLZD", clobberFlags: true}, // count leading zeros
{name: "CNTLZW", argLength: 1, reg: gp11, asm: "CNTLZW", clobberFlags: true}, // count leading zeros (32 bit)
@@ -600,25 +607,22 @@ func init() {
{name: "LoweredAtomicLoadPtr", argLength: 2, reg: gpload, typ: "Int64", aux: "Int64", clobberFlags: true, faultOnNilArg0: true},
// atomic add32, 64
- // SYNC
+ // LWSYNC
// LDAR (Rarg0), Rout
// ADD Rarg1, Rout
// STDCCC Rout, (Rarg0)
// BNE -3(PC)
- // ISYNC
// return new sum
-
{name: "LoweredAtomicAdd32", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
{name: "LoweredAtomicAdd64", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
// atomic exchange32, 64
- // SYNC
+ // LWSYNC
// LDAR (Rarg0), Rout
// STDCCC Rarg1, (Rarg0)
// BNE -2(PC)
// ISYNC
// return old val
-
{name: "LoweredAtomicExchange32", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
{name: "LoweredAtomicExchange64", argLength: 3, reg: gpxchg, resultNotInArgs: true, clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
@@ -641,15 +645,16 @@ func init() {
{name: "LoweredAtomicCas64", argLength: 4, reg: gpcas, resultNotInArgs: true, aux: "Int64", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
{name: "LoweredAtomicCas32", argLength: 4, reg: gpcas, resultNotInArgs: true, aux: "Int64", clobberFlags: true, faultOnNilArg0: true, hasSideEffects: true},
- // atomic 8 and/or.
+ // atomic 8/32 and/or.
// *arg0 &= (|=) arg1. arg2=mem. returns memory. auxint must be zero.
- // LBAR (Rarg0), Rtmp
+ // LBAR/LWAT (Rarg0), Rtmp
// AND/OR Rarg1, Rtmp
- // STBCCC Rtmp, (Rarg0), Rtmp
+ // STBCCC/STWCCC Rtmp, (Rarg0), Rtmp
// BNE Rtmp, -3(PC)
-
{name: "LoweredAtomicAnd8", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true},
+ {name: "LoweredAtomicAnd32", argLength: 3, reg: gpstore, asm: "AND", faultOnNilArg0: true, hasSideEffects: true},
{name: "LoweredAtomicOr8", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true},
+ {name: "LoweredAtomicOr32", argLength: 3, reg: gpstore, asm: "OR", faultOnNilArg0: true, hasSideEffects: true},
// LoweredWB invokes runtime.gcWriteBarrier. arg0=destptr, arg1=srcptr, arg2=mem, aux=runtime.gcWriteBarrier
// It preserves R0 through R17 (except special registers R1, R2, R11, R12, R13), g, and its arguments R20 and R21,
diff --git a/src/cmd/compile/internal/ssa/gen/RISCV64.rules b/src/cmd/compile/internal/ssa/gen/RISCV64.rules
index 9437c8e9d4..325cbeb825 100644
--- a/src/cmd/compile/internal/ssa/gen/RISCV64.rules
+++ b/src/cmd/compile/internal/ssa/gen/RISCV64.rules
@@ -3,11 +3,7 @@
// license that can be found in the LICENSE file.
// Optimizations TODO:
-// * Somehow track when values are already zero/signed-extended, avoid re-extending.
// * Use SLTI and SLTIU for comparisons to constants, instead of SLT/SLTU with constants in registers
-// * Find a more efficient way to do zero/sign extension than left+right shift.
-// There are many other options (store then load-extend, LUI+ANDI for zero extend, special case 32->64, ...),
-// but left+right shift is simple and uniform, and we don't have real hardware to do perf testing on anyway.
// * Use the zero register instead of moving 0 into a register.
// * Add rules to avoid generating a temp bool value for (If (SLT[U] ...) ...).
// * Optimize left and right shift by simplifying SLTIU, Neg, and ADD for constants.
@@ -66,8 +62,8 @@
(Mod32u ...) => (REMUW ...)
(Mod16 x y [false]) => (REMW (SignExt16to32 x) (SignExt16to32 y))
(Mod16u x y) => (REMUW (ZeroExt16to32 x) (ZeroExt16to32 y))
-(Mod8 x y) => (REMW (SignExt8to32 x) (SignExt8to32 y))
-(Mod8u x y) => (REMUW (ZeroExt8to32 x) (ZeroExt8to32 y))
+(Mod8 x y) => (REMW (SignExt8to32 x) (SignExt8to32 y))
+(Mod8u x y) => (REMUW (ZeroExt8to32 x) (ZeroExt8to32 y))
(And64 ...) => (AND ...)
(And32 ...) => (AND ...)
@@ -98,25 +94,21 @@
(Sqrt ...) => (FSQRTD ...)
-// Zero and sign extension
-// Shift left until the bits we want are at the top of the register.
-// Then logical/arithmetic shift right for zero/sign extend.
-// We always extend to 64 bits; there's no reason not to,
-// and optimization rules can then collapse some extensions.
-
-(SignExt8to16 <t> x) => (SRAI [56] (SLLI <t> [56] x))
-(SignExt8to32 <t> x) => (SRAI [56] (SLLI <t> [56] x))
-(SignExt8to64 <t> x) => (SRAI [56] (SLLI <t> [56] x))
-(SignExt16to32 <t> x) => (SRAI [48] (SLLI <t> [48] x))
-(SignExt16to64 <t> x) => (SRAI [48] (SLLI <t> [48] x))
-(SignExt32to64 <t> x) => (ADDIW [0] x)
-
-(ZeroExt8to16 <t> x) => (SRLI [56] (SLLI <t> [56] x))
-(ZeroExt8to32 <t> x) => (SRLI [56] (SLLI <t> [56] x))
-(ZeroExt8to64 <t> x) => (SRLI [56] (SLLI <t> [56] x))
-(ZeroExt16to32 <t> x) => (SRLI [48] (SLLI <t> [48] x))
-(ZeroExt16to64 <t> x) => (SRLI [48] (SLLI <t> [48] x))
-(ZeroExt32to64 <t> x) => (SRLI [32] (SLLI <t> [32] x))
+// Sign and zero extension.
+
+(SignExt8to16 ...) => (MOVBreg ...)
+(SignExt8to32 ...) => (MOVBreg ...)
+(SignExt8to64 ...) => (MOVBreg ...)
+(SignExt16to32 ...) => (MOVHreg ...)
+(SignExt16to64 ...) => (MOVHreg ...)
+(SignExt32to64 ...) => (MOVWreg ...)
+
+(ZeroExt8to16 ...) => (MOVBUreg ...)
+(ZeroExt8to32 ...) => (MOVBUreg ...)
+(ZeroExt8to64 ...) => (MOVBUreg ...)
+(ZeroExt16to32 ...) => (MOVHUreg ...)
+(ZeroExt16to64 ...) => (MOVHUreg ...)
+(ZeroExt32to64 ...) => (MOVWUreg ...)
(Cvt32to32F ...) => (FCVTSW ...)
(Cvt32to64F ...) => (FCVTDW ...)
@@ -261,16 +253,16 @@
(EqPtr x y) => (SEQZ (SUB <x.Type> x y))
(Eq64 x y) => (SEQZ (SUB <x.Type> x y))
(Eq32 x y) => (SEQZ (SUBW <x.Type> x y))
-(Eq16 x y) => (SEQZ (ZeroExt16to64 (SUB <x.Type> x y)))
-(Eq8 x y) => (SEQZ (ZeroExt8to64 (SUB <x.Type> x y)))
+(Eq16 x y) => (SEQZ (SUB <x.Type> (ZeroExt16to64 x) (ZeroExt16to64 y)))
+(Eq8 x y) => (SEQZ (SUB <x.Type> (ZeroExt8to64 x) (ZeroExt8to64 y)))
(Eq64F ...) => (FEQD ...)
(Eq32F ...) => (FEQS ...)
(NeqPtr x y) => (SNEZ (SUB <x.Type> x y))
(Neq64 x y) => (SNEZ (SUB <x.Type> x y))
(Neq32 x y) => (SNEZ (SUBW <x.Type> x y))
-(Neq16 x y) => (SNEZ (ZeroExt16to64 (SUB <x.Type> x y)))
-(Neq8 x y) => (SNEZ (ZeroExt8to64 (SUB <x.Type> x y)))
+(Neq16 x y) => (SNEZ (SUB <x.Type> (ZeroExt16to64 x) (ZeroExt16to64 y)))
+(Neq8 x y) => (SNEZ (SUB <x.Type> (ZeroExt8to64 x) (ZeroExt8to64 y)))
(Neq64F ...) => (FNED ...)
(Neq32F ...) => (FNES ...)
@@ -291,8 +283,8 @@
(Store {t} ptr val mem) && t.Size() == 2 => (MOVHstore ptr val mem)
(Store {t} ptr val mem) && t.Size() == 4 && !is32BitFloat(val.Type) => (MOVWstore ptr val mem)
(Store {t} ptr val mem) && t.Size() == 8 && !is64BitFloat(val.Type) => (MOVDstore ptr val mem)
-(Store {t} ptr val mem) && t.Size() == 4 && is32BitFloat(val.Type) => (FMOVWstore ptr val mem)
-(Store {t} ptr val mem) && t.Size() == 8 && is64BitFloat(val.Type) => (FMOVDstore ptr val mem)
+(Store {t} ptr val mem) && t.Size() == 4 && is32BitFloat(val.Type) => (FMOVWstore ptr val mem)
+(Store {t} ptr val mem) && t.Size() == 8 && is64BitFloat(val.Type) => (FMOVDstore ptr val mem)
// We need to fold MOVaddr into the LD/MOVDstore ops so that the live variable analysis
// knows what variables are being read/written by the ops.
@@ -368,6 +360,13 @@
(Zero [4] ptr mem) => (MOVWstore ptr (MOVWconst) mem)
(Zero [8] ptr mem) => (MOVDstore ptr (MOVDconst) mem)
+// Medium zeroing uses a Duff's device
+// 8 and 128 are magic constants, see runtime/mkduff.go
+(Zero [s] {t} ptr mem)
+ && s%8 == 0 && s >= 16 && s <= 8*128
+ && t.Alignment()%8 == 0 && !config.noDuffDevice =>
+ (DUFFZERO [8 * (128 - s/8)] ptr mem)
+
// Generic zeroing uses a loop
(Zero [s] {t} ptr mem) =>
(LoweredZero [t.Alignment()]
@@ -403,6 +402,13 @@
(Move [4] dst src mem) => (MOVWstore dst (MOVWload src mem) mem)
(Move [8] dst src mem) => (MOVDstore dst (MOVDload src mem) mem)
+// Medium move uses a Duff's device
+// 16 and 128 are magic constants, see runtime/mkduff.go
+(Move [s] {t} dst src mem)
+ && s%8 == 0 && s >= 16 && s <= 8*128 && t.Alignment()%8 == 0
+ && !config.noDuffDevice && logLargeCopy(v, s) =>
+ (DUFFCOPY [16 * (128 - s/8)] dst src mem)
+
// Generic move uses a loop
(Move [s] {t} dst src mem) && (s <= 16 || logLargeCopy(v, s)) =>
(LoweredMove [t.Alignment()]
@@ -501,6 +507,65 @@
(MOVWstore [off] {sym} ptr (MOVWconst [0]) mem) => (MOVWstorezero [off] {sym} ptr mem)
(MOVDstore [off] {sym} ptr (MOVDconst [0]) mem) => (MOVDstorezero [off] {sym} ptr mem)
+// Avoid sign/zero extension after properly typed load.
+(MOVBreg x:(MOVBload _ _)) => (MOVDreg x)
+(MOVHreg x:(MOVBload _ _)) => (MOVDreg x)
+(MOVHreg x:(MOVBUload _ _)) => (MOVDreg x)
+(MOVHreg x:(MOVHload _ _)) => (MOVDreg x)
+(MOVWreg x:(MOVBload _ _)) => (MOVDreg x)
+(MOVWreg x:(MOVBUload _ _)) => (MOVDreg x)
+(MOVWreg x:(MOVHload _ _)) => (MOVDreg x)
+(MOVWreg x:(MOVHUload _ _)) => (MOVDreg x)
+(MOVWreg x:(MOVWload _ _)) => (MOVDreg x)
+(MOVBUreg x:(MOVBUload _ _)) => (MOVDreg x)
+(MOVHUreg x:(MOVBUload _ _)) => (MOVDreg x)
+(MOVHUreg x:(MOVHUload _ _)) => (MOVDreg x)
+(MOVWUreg x:(MOVBUload _ _)) => (MOVDreg x)
+(MOVWUreg x:(MOVHUload _ _)) => (MOVDreg x)
+(MOVWUreg x:(MOVWUload _ _)) => (MOVDreg x)
+
+// Fold double extensions.
+(MOVBreg x:(MOVBreg _)) => (MOVDreg x)
+(MOVHreg x:(MOVBreg _)) => (MOVDreg x)
+(MOVHreg x:(MOVBUreg _)) => (MOVDreg x)
+(MOVHreg x:(MOVHreg _)) => (MOVDreg x)
+(MOVWreg x:(MOVBreg _)) => (MOVDreg x)
+(MOVWreg x:(MOVBUreg _)) => (MOVDreg x)
+(MOVWreg x:(MOVHreg _)) => (MOVDreg x)
+(MOVWreg x:(MOVWreg _)) => (MOVDreg x)
+(MOVBUreg x:(MOVBUreg _)) => (MOVDreg x)
+(MOVHUreg x:(MOVBUreg _)) => (MOVDreg x)
+(MOVHUreg x:(MOVHUreg _)) => (MOVDreg x)
+(MOVWUreg x:(MOVBUreg _)) => (MOVDreg x)
+(MOVWUreg x:(MOVHUreg _)) => (MOVDreg x)
+(MOVWUreg x:(MOVWUreg _)) => (MOVDreg x)
+
+// Do not extend before store.
+(MOVBstore [off] {sym} ptr (MOVBreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr (MOVHreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr (MOVWreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr (MOVBUreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
+(MOVBstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVBstore [off] {sym} ptr x mem)
+(MOVHstore [off] {sym} ptr (MOVHreg x) mem) => (MOVHstore [off] {sym} ptr x mem)
+(MOVHstore [off] {sym} ptr (MOVWreg x) mem) => (MOVHstore [off] {sym} ptr x mem)
+(MOVHstore [off] {sym} ptr (MOVHUreg x) mem) => (MOVHstore [off] {sym} ptr x mem)
+(MOVHstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVHstore [off] {sym} ptr x mem)
+(MOVWstore [off] {sym} ptr (MOVWreg x) mem) => (MOVWstore [off] {sym} ptr x mem)
+(MOVWstore [off] {sym} ptr (MOVWUreg x) mem) => (MOVWstore [off] {sym} ptr x mem)
+
+// Replace extend after load with alternate load where possible.
+(MOVBreg <t> x:(MOVBUload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBload <t> [off] {sym} ptr mem)
+(MOVHreg <t> x:(MOVHUload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVHload <t> [off] {sym} ptr mem)
+(MOVWreg <t> x:(MOVWUload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWload <t> [off] {sym} ptr mem)
+(MOVBUreg <t> x:(MOVBload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVBUload <t> [off] {sym} ptr mem)
+(MOVHUreg <t> x:(MOVHload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVHUload <t> [off] {sym} ptr mem)
+(MOVWUreg <t> x:(MOVWload [off] {sym} ptr mem)) && x.Uses == 1 && clobber(x) => @x.Block (MOVWUload <t> [off] {sym} ptr mem)
+
+// If a register move has only 1 use, just use the same register without emitting instruction
+// MOVnop does not emit an instruction, only for ensuring the type.
+(MOVDreg x) && x.Uses == 1 => (MOVDnop x)
+
// Fold constant into immediate instructions where possible.
(ADD (MOVBconst [val]) x) => (ADDI [int64(val)] x)
(ADD (MOVHconst [val]) x) => (ADDI [int64(val)] x)
diff --git a/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go b/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go
index b06b86075e..f64319230b 100644
--- a/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go
+++ b/src/cmd/compile/internal/ssa/gen/RISCV64Ops.go
@@ -24,10 +24,11 @@ import (
// L = 64 bit int, used when the opcode starts with F
const (
- riscv64REG_G = 4
+ riscv64REG_G = 27
riscv64REG_CTXT = 20
riscv64REG_LR = 1
riscv64REG_SP = 2
+ riscv64REG_TP = 4
riscv64REG_TMP = 31
riscv64REG_ZERO = 0
)
@@ -78,8 +79,8 @@ func init() {
// Add general purpose registers to gpMask.
switch r {
- // ZERO, and TMP are not in any gp mask.
- case riscv64REG_ZERO, riscv64REG_TMP:
+ // ZERO, TP and TMP are not in any gp mask.
+ case riscv64REG_ZERO, riscv64REG_TP, riscv64REG_TMP:
case riscv64REG_G:
gpgMask |= mask
gpspsbgMask |= mask
@@ -192,6 +193,17 @@ func init() {
{name: "MOVWstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOVW", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // 32 bits
{name: "MOVDstorezero", argLength: 2, reg: gpstore0, aux: "SymOff", asm: "MOV", typ: "Mem", faultOnNilArg0: true, symEffect: "Write"}, // 64 bits
+ // Conversions
+ {name: "MOVBreg", argLength: 1, reg: gp11, asm: "MOVB"}, // move from arg0, sign-extended from byte
+ {name: "MOVHreg", argLength: 1, reg: gp11, asm: "MOVH"}, // move from arg0, sign-extended from half
+ {name: "MOVWreg", argLength: 1, reg: gp11, asm: "MOVW"}, // move from arg0, sign-extended from word
+ {name: "MOVDreg", argLength: 1, reg: gp11, asm: "MOV"}, // move from arg0
+ {name: "MOVBUreg", argLength: 1, reg: gp11, asm: "MOVBU"}, // move from arg0, unsign-extended from byte
+ {name: "MOVHUreg", argLength: 1, reg: gp11, asm: "MOVHU"}, // move from arg0, unsign-extended from half
+ {name: "MOVWUreg", argLength: 1, reg: gp11, asm: "MOVWU"}, // move from arg0, unsign-extended from word
+
+ {name: "MOVDnop", argLength: 1, reg: regInfo{inputs: []regMask{gpMask}, outputs: []regMask{gpMask}}, resultInArg0: true}, // nop, return arg0 in same register
+
// Shift ops
{name: "SLL", argLength: 2, reg: gp21, asm: "SLL"}, // arg0 << (aux1 & 63)
{name: "SRA", argLength: 2, reg: gp21, asm: "SRA"}, // arg0 >> (aux1 & 63), signed
@@ -228,6 +240,44 @@ func init() {
{name: "CALLclosure", argLength: 3, reg: callClosure, aux: "CallOff", call: true}, // call function via closure. arg0=codeptr, arg1=closure, arg2=mem, auxint=argsize, returns mem
{name: "CALLinter", argLength: 2, reg: callInter, aux: "CallOff", call: true}, // call fn by pointer. arg0=codeptr, arg1=mem, auxint=argsize, returns mem
+ // duffzero
+ // arg0 = address of memory to zero (in X10, changed as side effect)
+ // arg1 = mem
+ // auxint = offset into duffzero code to start executing
+ // X1 (link register) changed because of function call
+ // returns mem
+ {
+ name: "DUFFZERO",
+ aux: "Int64",
+ argLength: 2,
+ reg: regInfo{
+ inputs: []regMask{regNamed["X10"]},
+ clobbers: regNamed["X1"] | regNamed["X10"],
+ },
+ typ: "Mem",
+ faultOnNilArg0: true,
+ },
+
+ // duffcopy
+ // arg0 = address of dst memory (in X11, changed as side effect)
+ // arg1 = address of src memory (in X10, changed as side effect)
+ // arg2 = mem
+ // auxint = offset into duffcopy code to start executing
+ // X1 (link register) changed because of function call
+ // returns mem
+ {
+ name: "DUFFCOPY",
+ aux: "Int64",
+ argLength: 3,
+ reg: regInfo{
+ inputs: []regMask{regNamed["X11"], regNamed["X10"]},
+ clobbers: regNamed["X1"] | regNamed["X10"] | regNamed["X11"],
+ },
+ typ: "Mem",
+ faultOnNilArg0: true,
+ faultOnNilArg1: true,
+ },
+
// Generic moves and zeros
// general unaligned zeroing
diff --git a/src/cmd/compile/internal/ssa/gen/S390X.rules b/src/cmd/compile/internal/ssa/gen/S390X.rules
index e564f638d3..2d6f091a4e 100644
--- a/src/cmd/compile/internal/ssa/gen/S390X.rules
+++ b/src/cmd/compile/internal/ssa/gen/S390X.rules
@@ -198,6 +198,9 @@
(RXSBG <typ.UInt32> {s390x.NewRotateParams(59, 60, 3)} (MOVDconst [3<<3]) ptr))
mem)
+(AtomicAnd32 ...) => (LAN ...)
+(AtomicOr32 ...) => (LAO ...)
+
// Lowering extension
// Note: we always extend to 64 bits even though some ops don't need that many result bits.
(SignExt8to(16|32|64) ...) => (MOVBreg ...)
diff --git a/src/cmd/compile/internal/ssa/gen/S390XOps.go b/src/cmd/compile/internal/ssa/gen/S390XOps.go
index 417b33cf91..728cfb5508 100644
--- a/src/cmd/compile/internal/ssa/gen/S390XOps.go
+++ b/src/cmd/compile/internal/ssa/gen/S390XOps.go
@@ -547,8 +547,10 @@ func init() {
// Atomic bitwise operations.
// Note: 'floor' operations round the pointer down to the nearest word boundary
// which reflects how they are used in the runtime.
- {name: "LAOfloor", argLength: 3, reg: gpstorelab, asm: "LAO", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *(floor(arg0, 4)) |= arg1. arg2 = mem.
+ {name: "LAN", argLength: 3, reg: gpstore, asm: "LAN", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *arg0 &= arg1. arg2 = mem.
{name: "LANfloor", argLength: 3, reg: gpstorelab, asm: "LAN", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *(floor(arg0, 4)) &= arg1. arg2 = mem.
+ {name: "LAO", argLength: 3, reg: gpstore, asm: "LAO", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *arg0 |= arg1. arg2 = mem.
+ {name: "LAOfloor", argLength: 3, reg: gpstorelab, asm: "LAO", typ: "Mem", clobberFlags: true, hasSideEffects: true}, // *(floor(arg0, 4)) |= arg1. arg2 = mem.
// Compare and swap.
// arg0 = pointer, arg1 = old value, arg2 = new value, arg3 = memory.
diff --git a/src/cmd/compile/internal/ssa/gen/dec64.rules b/src/cmd/compile/internal/ssa/gen/dec64.rules
index 4f9e863f90..07607960fa 100644
--- a/src/cmd/compile/internal/ssa/gen/dec64.rules
+++ b/src/cmd/compile/internal/ssa/gen/dec64.rules
@@ -9,7 +9,6 @@
(Int64Hi (Int64Make hi _)) => hi
(Int64Lo (Int64Make _ lo)) => lo
-
(Load <t> ptr mem) && is64BitInt(t) && !config.BigEndian && t.IsSigned() =>
(Int64Make
(Load <typ.Int32> (OffPtr <typ.Int32Ptr> [4] ptr) mem)
@@ -143,6 +142,10 @@
(Trunc64to32 (Int64Make _ lo)) => lo
(Trunc64to16 (Int64Make _ lo)) => (Trunc32to16 lo)
(Trunc64to8 (Int64Make _ lo)) => (Trunc32to8 lo)
+// Most general
+(Trunc64to32 x) => (Int64Lo x)
+(Trunc64to16 x) => (Trunc32to16 (Int64Lo x))
+(Trunc64to8 x) => (Trunc32to8 (Int64Lo x))
(Lsh32x64 _ (Int64Make (Const32 [c]) _)) && c != 0 => (Const32 [0])
(Rsh32x64 x (Int64Make (Const32 [c]) _)) && c != 0 => (Signmask x)
@@ -175,156 +178,174 @@
// turn x64 non-constant shifts to x32 shifts
// if high 32-bit of the shift is nonzero, make a huge shift
(Lsh64x64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
- (Lsh64x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
+ (Lsh64x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
(Rsh64x64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
- (Rsh64x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
+ (Rsh64x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
(Rsh64Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
- (Rsh64Ux32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
+ (Rsh64Ux32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
(Lsh32x64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
- (Lsh32x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
+ (Lsh32x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
(Rsh32x64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
- (Rsh32x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
+ (Rsh32x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
(Rsh32Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
- (Rsh32Ux32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
+ (Rsh32Ux32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
(Lsh16x64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
- (Lsh16x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
+ (Lsh16x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
(Rsh16x64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
- (Rsh16x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
+ (Rsh16x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
(Rsh16Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
- (Rsh16Ux32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
+ (Rsh16Ux32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
(Lsh8x64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
- (Lsh8x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
+ (Lsh8x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
(Rsh8x64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
- (Rsh8x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
+ (Rsh8x32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
(Rsh8Ux64 x (Int64Make hi lo)) && hi.Op != OpConst32 =>
- (Rsh8Ux32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
+ (Rsh8Ux32 x (Or32 <typ.UInt32> (Zeromask hi) lo))
+
+// Most general
+(Lsh64x64 x y) => (Lsh64x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+(Rsh64x64 x y) => (Rsh64x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+(Rsh64Ux64 x y) => (Rsh64Ux32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+(Lsh32x64 x y) => (Lsh32x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+(Rsh32x64 x y) => (Rsh32x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+(Rsh32Ux64 x y) => (Rsh32Ux32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+(Lsh16x64 x y) => (Lsh16x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+(Rsh16x64 x y) => (Rsh16x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+(Rsh16Ux64 x y) => (Rsh16Ux32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+(Lsh8x64 x y) => (Lsh8x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+(Rsh8x64 x y) => (Rsh8x32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+(Rsh8Ux64 x y) => (Rsh8Ux32 x (Or32 <typ.UInt32> (Zeromask (Int64Hi y)) (Int64Lo y)))
+
+// Clean up constants a little
+(Or32 <typ.UInt32> (Zeromask (Const32 [c])) y) && c == 0 => y
+(Or32 <typ.UInt32> (Zeromask (Const32 [c])) y) && c != 0 => (Const32 <typ.UInt32> [-1])
// 64x left shift
// result.hi = hi<<s | lo>>(32-s) | lo<<(s-32) // >> is unsigned, large shifts result 0
// result.lo = lo<<s
-(Lsh64x32 (Int64Make hi lo) s) =>
+(Lsh64x32 x s) =>
(Int64Make
(Or32 <typ.UInt32>
(Or32 <typ.UInt32>
- (Lsh32x32 <typ.UInt32> hi s)
+ (Lsh32x32 <typ.UInt32> (Int64Hi x) s)
(Rsh32Ux32 <typ.UInt32>
- lo
+ (Int64Lo x)
(Sub32 <typ.UInt32> (Const32 <typ.UInt32> [32]) s)))
(Lsh32x32 <typ.UInt32>
- lo
+ (Int64Lo x)
(Sub32 <typ.UInt32> s (Const32 <typ.UInt32> [32]))))
- (Lsh32x32 <typ.UInt32> lo s))
-(Lsh64x16 (Int64Make hi lo) s) =>
+ (Lsh32x32 <typ.UInt32> (Int64Lo x) s))
+(Lsh64x16 x s) =>
(Int64Make
(Or32 <typ.UInt32>
(Or32 <typ.UInt32>
- (Lsh32x16 <typ.UInt32> hi s)
+ (Lsh32x16 <typ.UInt32> (Int64Hi x) s)
(Rsh32Ux16 <typ.UInt32>
- lo
+ (Int64Lo x)
(Sub16 <typ.UInt16> (Const16 <typ.UInt16> [32]) s)))
(Lsh32x16 <typ.UInt32>
- lo
+ (Int64Lo x)
(Sub16 <typ.UInt16> s (Const16 <typ.UInt16> [32]))))
- (Lsh32x16 <typ.UInt32> lo s))
-(Lsh64x8 (Int64Make hi lo) s) =>
+ (Lsh32x16 <typ.UInt32> (Int64Lo x) s))
+(Lsh64x8 x s) =>
(Int64Make
(Or32 <typ.UInt32>
(Or32 <typ.UInt32>
- (Lsh32x8 <typ.UInt32> hi s)
+ (Lsh32x8 <typ.UInt32> (Int64Hi x) s)
(Rsh32Ux8 <typ.UInt32>
- lo
+ (Int64Lo x)
(Sub8 <typ.UInt8> (Const8 <typ.UInt8> [32]) s)))
(Lsh32x8 <typ.UInt32>
- lo
+ (Int64Lo x)
(Sub8 <typ.UInt8> s (Const8 <typ.UInt8> [32]))))
- (Lsh32x8 <typ.UInt32> lo s))
+ (Lsh32x8 <typ.UInt32> (Int64Lo x) s))
// 64x unsigned right shift
// result.hi = hi>>s
// result.lo = lo>>s | hi<<(32-s) | hi>>(s-32) // >> is unsigned, large shifts result 0
-(Rsh64Ux32 (Int64Make hi lo) s) =>
+(Rsh64Ux32 x s) =>
(Int64Make
- (Rsh32Ux32 <typ.UInt32> hi s)
+ (Rsh32Ux32 <typ.UInt32> (Int64Hi x) s)
(Or32 <typ.UInt32>
(Or32 <typ.UInt32>
- (Rsh32Ux32 <typ.UInt32> lo s)
+ (Rsh32Ux32 <typ.UInt32> (Int64Lo x) s)
(Lsh32x32 <typ.UInt32>
- hi
+ (Int64Hi x)
(Sub32 <typ.UInt32> (Const32 <typ.UInt32> [32]) s)))
(Rsh32Ux32 <typ.UInt32>
- hi
+ (Int64Hi x)
(Sub32 <typ.UInt32> s (Const32 <typ.UInt32> [32])))))
-(Rsh64Ux16 (Int64Make hi lo) s) =>
+(Rsh64Ux16 x s) =>
(Int64Make
- (Rsh32Ux16 <typ.UInt32> hi s)
+ (Rsh32Ux16 <typ.UInt32> (Int64Hi x) s)
(Or32 <typ.UInt32>
(Or32 <typ.UInt32>
- (Rsh32Ux16 <typ.UInt32> lo s)
+ (Rsh32Ux16 <typ.UInt32> (Int64Lo x) s)
(Lsh32x16 <typ.UInt32>
- hi
+ (Int64Hi x)
(Sub16 <typ.UInt16> (Const16 <typ.UInt16> [32]) s)))
(Rsh32Ux16 <typ.UInt32>
- hi
+ (Int64Hi x)
(Sub16 <typ.UInt16> s (Const16 <typ.UInt16> [32])))))
-(Rsh64Ux8 (Int64Make hi lo) s) =>
+(Rsh64Ux8 x s) =>
(Int64Make
- (Rsh32Ux8 <typ.UInt32> hi s)
+ (Rsh32Ux8 <typ.UInt32> (Int64Hi x) s)
(Or32 <typ.UInt32>
(Or32 <typ.UInt32>
- (Rsh32Ux8 <typ.UInt32> lo s)
+ (Rsh32Ux8 <typ.UInt32> (Int64Lo x) s)
(Lsh32x8 <typ.UInt32>
- hi
+ (Int64Hi x)
(Sub8 <typ.UInt8> (Const8 <typ.UInt8> [32]) s)))
(Rsh32Ux8 <typ.UInt32>
- hi
+ (Int64Hi x)
(Sub8 <typ.UInt8> s (Const8 <typ.UInt8> [32])))))
// 64x signed right shift
// result.hi = hi>>s
// result.lo = lo>>s | hi<<(32-s) | (hi>>(s-32))&zeromask(s>>5) // hi>>(s-32) is signed, large shifts result 0/-1
-(Rsh64x32 (Int64Make hi lo) s) =>
+(Rsh64x32 x s) =>
(Int64Make
- (Rsh32x32 <typ.UInt32> hi s)
+ (Rsh32x32 <typ.UInt32> (Int64Hi x) s)
(Or32 <typ.UInt32>
(Or32 <typ.UInt32>
- (Rsh32Ux32 <typ.UInt32> lo s)
+ (Rsh32Ux32 <typ.UInt32> (Int64Lo x) s)
(Lsh32x32 <typ.UInt32>
- hi
+ (Int64Hi x)
(Sub32 <typ.UInt32> (Const32 <typ.UInt32> [32]) s)))
(And32 <typ.UInt32>
(Rsh32x32 <typ.UInt32>
- hi
+ (Int64Hi x)
(Sub32 <typ.UInt32> s (Const32 <typ.UInt32> [32])))
(Zeromask
(Rsh32Ux32 <typ.UInt32> s (Const32 <typ.UInt32> [5]))))))
-(Rsh64x16 (Int64Make hi lo) s) =>
+(Rsh64x16 x s) =>
(Int64Make
- (Rsh32x16 <typ.UInt32> hi s)
+ (Rsh32x16 <typ.UInt32> (Int64Hi x) s)
(Or32 <typ.UInt32>
(Or32 <typ.UInt32>
- (Rsh32Ux16 <typ.UInt32> lo s)
+ (Rsh32Ux16 <typ.UInt32> (Int64Lo x) s)
(Lsh32x16 <typ.UInt32>
- hi
+ (Int64Hi x)
(Sub16 <typ.UInt16> (Const16 <typ.UInt16> [32]) s)))
(And32 <typ.UInt32>
(Rsh32x16 <typ.UInt32>
- hi
+ (Int64Hi x)
(Sub16 <typ.UInt16> s (Const16 <typ.UInt16> [32])))
(Zeromask
(ZeroExt16to32
(Rsh16Ux32 <typ.UInt16> s (Const32 <typ.UInt32> [5])))))))
-(Rsh64x8 (Int64Make hi lo) s) =>
+(Rsh64x8 x s) =>
(Int64Make
- (Rsh32x8 <typ.UInt32> hi s)
+ (Rsh32x8 <typ.UInt32> (Int64Hi x) s)
(Or32 <typ.UInt32>
(Or32 <typ.UInt32>
- (Rsh32Ux8 <typ.UInt32> lo s)
+ (Rsh32Ux8 <typ.UInt32> (Int64Lo x) s)
(Lsh32x8 <typ.UInt32>
- hi
+ (Int64Hi x)
(Sub8 <typ.UInt8> (Const8 <typ.UInt8> [32]) s)))
(And32 <typ.UInt32>
(Rsh32x8 <typ.UInt32>
- hi
+ (Int64Hi x)
(Sub8 <typ.UInt8> s (Const8 <typ.UInt8> [32])))
(Zeromask
(ZeroExt8to32
diff --git a/src/cmd/compile/internal/ssa/gen/generic.rules b/src/cmd/compile/internal/ssa/gen/generic.rules
index 39f8cc8889..4351ef5bdd 100644
--- a/src/cmd/compile/internal/ssa/gen/generic.rules
+++ b/src/cmd/compile/internal/ssa/gen/generic.rules
@@ -1961,6 +1961,31 @@
&& warnRule(fe.Debug_checknil(), v, "removed nil check")
=> (Invalid)
+// for late-expanded calls
+(Zero (SelectN [0] call:(StaticLECall _ _)) mem:(SelectN [1] call))
+ && isSameCall(call.Aux, "runtime.newobject")
+ => mem
+
+(Store (SelectN [0] call:(StaticLECall _ _)) x mem:(SelectN [1] call))
+ && isConstZero(x)
+ && isSameCall(call.Aux, "runtime.newobject")
+ => mem
+
+(Store (OffPtr (SelectN [0] call:(StaticLECall _ _))) x mem:(SelectN [1] call))
+ && isConstZero(x)
+ && isSameCall(call.Aux, "runtime.newobject")
+ => mem
+
+(NilCheck (SelectN [0] call:(StaticLECall _ _)) (SelectN [1] call))
+ && isSameCall(call.Aux, "runtime.newobject")
+ && warnRule(fe.Debug_checknil(), v, "removed nil check")
+ => (Invalid)
+
+(NilCheck (OffPtr (SelectN [0] call:(StaticLECall _ _))) (SelectN [1] call))
+ && isSameCall(call.Aux, "runtime.newobject")
+ && warnRule(fe.Debug_checknil(), v, "removed nil check")
+ => (Invalid)
+
// Evaluate constant address comparisons.
(EqPtr x x) => (ConstBool [true])
(NeqPtr x x) => (ConstBool [false])
@@ -2017,6 +2042,17 @@
&& clobber(s1, s2, s3)
=> (Move {t.Elem()} [int64(sz)] dst src mem)
+// Inline small or disjoint runtime.memmove calls with constant length.
+// See the comment in op Move in genericOps.go for discussion of the type.
+(SelectN [0] call:(StaticLECall {sym} dst src (Const(64|32) [sz]) mem))
+ && sz >= 0
+ && call.Uses == 1 // this will exclude all calls with results
+ && isSameCall(sym, "runtime.memmove")
+ && dst.Type.IsPtr() // avoids TUINTPTR, see issue 30061
+ && isInlinableMemmove(dst, src, int64(sz), config)
+ && clobber(call)
+ => (Move {dst.Type.Elem()} [int64(sz)] dst src mem)
+
// De-virtualize interface calls into static calls.
// Note that (ITab (IMake)) doesn't get
// rewritten until after the first opt pass,
@@ -2024,6 +2060,13 @@
(InterCall [argsize] {auxCall} (Load (OffPtr [off] (ITab (IMake (Addr {itab} (SB)) _))) _) mem) && devirt(v, auxCall, itab, off) != nil =>
(StaticCall [int32(argsize)] {devirt(v, auxCall, itab, off)} mem)
+// De-virtualize late-expanded interface calls into late-expanded static calls.
+// Note that (ITab (IMake)) doesn't get rewritten until after the first opt pass,
+// so this rule should trigger reliably.
+// devirtLECall removes the first argument, adds the devirtualized symbol to the AuxCall, and changes the opcode
+(InterLECall [argsize] {auxCall} (Load (OffPtr [off] (ITab (IMake (Addr {itab} (SB)) _))) _) ___) && devirtLESym(v, auxCall, itab, off) !=
+ nil => devirtLECall(v, devirtLESym(v, auxCall, itab, off))
+
// Move and Zero optimizations.
// Move source and destination may overlap.
@@ -2404,6 +2447,7 @@
(Store {t5} (OffPtr <tt5> [o5] dst) d4
(Zero {t1} [n] dst mem)))))
+// TODO this does not fire before call expansion; is that acceptable?
(StaticCall {sym} x) && needRaceCleanup(sym, v) => x
// Collapse moving A -> B -> C into just A -> C.
diff --git a/src/cmd/compile/internal/ssa/gen/genericOps.go b/src/cmd/compile/internal/ssa/gen/genericOps.go
index 95edff4c8c..23bd4af2cd 100644
--- a/src/cmd/compile/internal/ssa/gen/genericOps.go
+++ b/src/cmd/compile/internal/ssa/gen/genericOps.go
@@ -389,10 +389,12 @@ var genericOps = []opData{
// TODO(josharian): ClosureCall and InterCall should have Int32 aux
// to match StaticCall's 32 bit arg size limit.
// TODO(drchase,josharian): could the arg size limit be bundled into the rules for CallOff?
- {name: "ClosureCall", argLength: 3, aux: "CallOff", call: true}, // arg0=code pointer, arg1=context ptr, arg2=memory. auxint=arg size. Returns memory.
- {name: "StaticCall", argLength: 1, aux: "CallOff", call: true}, // call function aux.(*obj.LSym), arg0=memory. auxint=arg size. Returns memory.
- {name: "InterCall", argLength: 2, aux: "CallOff", call: true}, // interface call. arg0=code pointer, arg1=memory, auxint=arg size. Returns memory.
- {name: "StaticLECall", argLength: -1, aux: "CallOff", call: true}, // late-expanded static call function aux.(*ssa.AuxCall.Fn). arg0..argN-1 are inputs, argN is mem. auxint = arg size. Result is tuple of result(s), plus mem.
+ {name: "ClosureCall", argLength: 3, aux: "CallOff", call: true}, // arg0=code pointer, arg1=context ptr, arg2=memory. auxint=arg size. Returns memory.
+ {name: "StaticCall", argLength: 1, aux: "CallOff", call: true}, // call function aux.(*obj.LSym), arg0=memory. auxint=arg size. Returns memory.
+ {name: "InterCall", argLength: 2, aux: "CallOff", call: true}, // interface call. arg0=code pointer, arg1=memory, auxint=arg size. Returns memory.
+ {name: "ClosureLECall", argLength: -1, aux: "CallOff", call: true}, // late-expanded closure call. arg0=code pointer, arg1=context ptr, arg2..argN-1 are inputs, argN is mem. auxint = arg size. Result is tuple of result(s), plus mem.
+ {name: "StaticLECall", argLength: -1, aux: "CallOff", call: true}, // late-expanded static call function aux.(*ssa.AuxCall.Fn). arg0..argN-1 are inputs, argN is mem. auxint = arg size. Result is tuple of result(s), plus mem.
+ {name: "InterLECall", argLength: -1, aux: "CallOff", call: true}, // late-expanded interface call. arg0=code pointer, arg1..argN-1 are inputs, argN is mem. auxint = arg size. Result is tuple of result(s), plus mem.
// Conversions: signed extensions, zero (unsigned) extensions, truncations
{name: "SignExt8to16", argLength: 1, typ: "Int16"},
@@ -539,20 +541,22 @@ var genericOps = []opData{
{name: "SelectN", argLength: 1, aux: "Int64"}, // arg0=tuple, auxint=field index. Returns the auxint'th member.
{name: "SelectNAddr", argLength: 1, aux: "Int64"}, // arg0=tuple, auxint=field index. Returns the address of auxint'th member. Used for un-SSA-able result types.
- // Atomic operations used for semantically inlining runtime/internal/atomic.
- // Atomic loads return a new memory so that the loads are properly ordered
- // with respect to other loads and stores.
- // TODO: use for sync/atomic at some point.
+ // Atomic operations used for semantically inlining sync/atomic and
+ // runtime/internal/atomic. Atomic loads return a new memory so that
+ // the loads are properly ordered with respect to other loads and
+ // stores.
{name: "AtomicLoad8", argLength: 2, typ: "(UInt8,Mem)"}, // Load from arg0. arg1=memory. Returns loaded value and new memory.
{name: "AtomicLoad32", argLength: 2, typ: "(UInt32,Mem)"}, // Load from arg0. arg1=memory. Returns loaded value and new memory.
{name: "AtomicLoad64", argLength: 2, typ: "(UInt64,Mem)"}, // Load from arg0. arg1=memory. Returns loaded value and new memory.
{name: "AtomicLoadPtr", argLength: 2, typ: "(BytePtr,Mem)"}, // Load from arg0. arg1=memory. Returns loaded value and new memory.
{name: "AtomicLoadAcq32", argLength: 2, typ: "(UInt32,Mem)"}, // Load from arg0. arg1=memory. Lock acquisition, returns loaded value and new memory.
+ {name: "AtomicLoadAcq64", argLength: 2, typ: "(UInt64,Mem)"}, // Load from arg0. arg1=memory. Lock acquisition, returns loaded value and new memory.
{name: "AtomicStore8", argLength: 3, typ: "Mem", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Returns memory.
{name: "AtomicStore32", argLength: 3, typ: "Mem", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Returns memory.
{name: "AtomicStore64", argLength: 3, typ: "Mem", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Returns memory.
{name: "AtomicStorePtrNoWB", argLength: 3, typ: "Mem", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Returns memory.
{name: "AtomicStoreRel32", argLength: 3, typ: "Mem", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Lock release, returns memory.
+ {name: "AtomicStoreRel64", argLength: 3, typ: "Mem", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Lock release, returns memory.
{name: "AtomicExchange32", argLength: 3, typ: "(UInt32,Mem)", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Returns old contents of *arg0 and new memory.
{name: "AtomicExchange64", argLength: 3, typ: "(UInt64,Mem)", hasSideEffects: true}, // Store arg1 to *arg0. arg2=memory. Returns old contents of *arg0 and new memory.
{name: "AtomicAdd32", argLength: 3, typ: "(UInt32,Mem)", hasSideEffects: true}, // Do *arg0 += arg1. arg2=memory. Returns sum and new memory.
@@ -561,7 +565,9 @@ var genericOps = []opData{
{name: "AtomicCompareAndSwap64", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true}, // if *arg0==arg1, then set *arg0=arg2. Returns true if store happens and new memory.
{name: "AtomicCompareAndSwapRel32", argLength: 4, typ: "(Bool,Mem)", hasSideEffects: true}, // if *arg0==arg1, then set *arg0=arg2. Lock release, reports whether store happens and new memory.
{name: "AtomicAnd8", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns memory.
+ {name: "AtomicAnd32", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 &= arg1. arg2=memory. Returns memory.
{name: "AtomicOr8", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns memory.
+ {name: "AtomicOr32", argLength: 3, typ: "Mem", hasSideEffects: true}, // *arg0 |= arg1. arg2=memory. Returns memory.
// Atomic operation variants
// These variants have the same semantics as above atomic operations.
diff --git a/src/cmd/compile/internal/ssa/gen/rulegen.go b/src/cmd/compile/internal/ssa/gen/rulegen.go
index be51a7c5f8..120ccbbdb3 100644
--- a/src/cmd/compile/internal/ssa/gen/rulegen.go
+++ b/src/cmd/compile/internal/ssa/gen/rulegen.go
@@ -35,8 +35,7 @@ import (
)
// rule syntax:
-// sexpr [&& extra conditions] -> [@block] sexpr (untyped)
-// sexpr [&& extra conditions] => [@block] sexpr (typed)
+// sexpr [&& extra conditions] => [@block] sexpr
//
// sexpr are s-expressions (lisp-like parenthesized groupings)
// sexpr ::= [variable:](opcode sexpr*)
@@ -50,8 +49,12 @@ import (
// variable ::= some token
// opcode ::= one of the opcodes from the *Ops.go files
+// special rules: trailing ellipsis "..." (in the outermost sexpr?) must match on both sides of a rule.
+// trailing three underscore "___" in the outermost match sexpr indicate the presence of
+// extra ignored args that need not appear in the replacement
+
// extra conditions is just a chunk of Go that evaluates to a boolean. It may use
-// variables declared in the matching sexpr. The variable "v" is predefined to be
+// variables declared in the matching tsexpr. The variable "v" is predefined to be
// the value matched by the entire rule.
// If multiple rules match, the first one in file order is selected.
@@ -75,14 +78,8 @@ func normalizeSpaces(s string) string {
}
// parse returns the matching part of the rule, additional conditions, and the result.
-// parse also reports whether the generated code should use strongly typed aux and auxint fields.
-func (r Rule) parse() (match, cond, result string, typed bool) {
- arrow := "->"
- if strings.Contains(r.Rule, "=>") {
- arrow = "=>"
- typed = true
- }
- s := strings.Split(r.Rule, arrow)
+func (r Rule) parse() (match, cond, result string) {
+ s := strings.Split(r.Rule, "=>")
match = normalizeSpaces(s[0])
result = normalizeSpaces(s[1])
cond = ""
@@ -90,7 +87,7 @@ func (r Rule) parse() (match, cond, result string, typed bool) {
cond = normalizeSpaces(match[i+2:])
match = normalizeSpaces(match[:i])
}
- return match, cond, result, typed
+ return match, cond, result
}
func genRules(arch arch) { genRulesSuffix(arch, "") }
@@ -116,7 +113,7 @@ func genRulesSuffix(arch arch, suff string) {
scanner := bufio.NewScanner(text)
rule := ""
var lineno int
- var ruleLineno int // line number of "->" or "=>"
+ var ruleLineno int // line number of "=>"
for scanner.Scan() {
lineno++
line := scanner.Text()
@@ -130,13 +127,13 @@ func genRulesSuffix(arch arch, suff string) {
if rule == "" {
continue
}
- if !strings.Contains(rule, "->") && !strings.Contains(rule, "=>") {
+ if !strings.Contains(rule, "=>") {
continue
}
if ruleLineno == 0 {
ruleLineno = lineno
}
- if strings.HasSuffix(rule, "->") || strings.HasSuffix(rule, "=>") {
+ if strings.HasSuffix(rule, "=>") {
continue // continue on the next line
}
if n := balance(rule); n > 0 {
@@ -153,7 +150,7 @@ func genRulesSuffix(arch arch, suff string) {
continue
}
// Do fancier value op matching.
- match, _, _, _ := r.parse()
+ match, _, _ := r.parse()
op, oparch, _, _, _, _ := parseValue(match, arch, loc)
opname := fmt.Sprintf("Op%s%s", oparch, op.name)
oprules[opname] = append(oprules[opname], r)
@@ -227,7 +224,7 @@ func genRulesSuffix(arch arch, suff string) {
log.Fatalf("unconditional rule %s is followed by other rules", rr.Match)
}
rr = &RuleRewrite{Loc: rule.Loc}
- rr.Match, rr.Cond, rr.Result, rr.Typed = rule.parse()
+ rr.Match, rr.Cond, rr.Result = rule.parse()
pos, _ := genMatch(rr, arch, rr.Match, fn.ArgLen >= 0)
if pos == "" {
pos = "v.Pos"
@@ -786,7 +783,6 @@ type (
Alloc int // for unique var names
Loc string // file name & line number of the original rule
CommuteDepth int // used to track depth of commute loops
- Typed bool // aux and auxint fields should be strongly typed
}
Declare struct {
Name string
@@ -840,7 +836,7 @@ func breakf(format string, a ...interface{}) *CondBreak {
func genBlockRewrite(rule Rule, arch arch, data blockData) *RuleRewrite {
rr := &RuleRewrite{Loc: rule.Loc}
- rr.Match, rr.Cond, rr.Result, rr.Typed = rule.parse()
+ rr.Match, rr.Cond, rr.Result = rule.parse()
_, _, auxint, aux, s := extract(rr.Match) // remove parens, then split
// check match of control values
@@ -884,15 +880,6 @@ func genBlockRewrite(rule Rule, arch arch, data blockData) *RuleRewrite {
if e.name == "" {
continue
}
- if !rr.Typed {
- if !token.IsIdentifier(e.name) || rr.declared(e.name) {
- // code or variable
- rr.add(breakf("b.%s != %s", e.field, e.name))
- } else {
- rr.add(declf(e.name, "b.%s", e.field))
- }
- continue
- }
if e.dclType == "" {
log.Fatalf("op %s has no declared type for %s", data.name, e.field)
@@ -961,20 +948,12 @@ func genBlockRewrite(rule Rule, arch arch, data blockData) *RuleRewrite {
}
if auxint != "" {
- if rr.Typed {
- // Make sure auxint value has the right type.
- rr.add(stmtf("b.AuxInt = %sToAuxInt(%s)", unTitle(outdata.auxIntType()), auxint))
- } else {
- rr.add(stmtf("b.AuxInt = %s", auxint))
- }
+ // Make sure auxint value has the right type.
+ rr.add(stmtf("b.AuxInt = %sToAuxInt(%s)", unTitle(outdata.auxIntType()), auxint))
}
if aux != "" {
- if rr.Typed {
- // Make sure aux value has the right type.
- rr.add(stmtf("b.Aux = %sToAux(%s)", unTitle(outdata.auxType()), aux))
- } else {
- rr.add(stmtf("b.Aux = %s", aux))
- }
+ // Make sure aux value has the right type.
+ rr.add(stmtf("b.Aux = %sToAux(%s)", unTitle(outdata.auxType()), aux))
}
succChanged := false
@@ -1019,6 +998,19 @@ func genMatch0(rr *RuleRewrite, arch arch, match, v string, cnt map[string]int,
pos = v + ".Pos"
}
+ // If the last argument is ___, it means "don't care about trailing arguments, really"
+ // The likely/intended use is for rewrites that are too tricky to express in the existing pattern language
+ // Do a length check early because long patterns fed short (ultimately not-matching) inputs will
+ // do an indexing error in pattern-matching.
+ if op.argLength == -1 {
+ l := len(args)
+ if l == 0 || args[l-1] != "___" {
+ rr.add(breakf("len(%s.Args) != %d", v, l))
+ } else if l > 1 && args[l-1] == "___" {
+ rr.add(breakf("len(%s.Args) < %d", v, l-1))
+ }
+ }
+
for _, e := range []struct {
name, field, dclType string
}{
@@ -1029,15 +1021,6 @@ func genMatch0(rr *RuleRewrite, arch arch, match, v string, cnt map[string]int,
if e.name == "" {
continue
}
- if !rr.Typed {
- if !token.IsIdentifier(e.name) || rr.declared(e.name) {
- // code or variable
- rr.add(breakf("%s.%s != %s", v, e.field, e.name))
- } else {
- rr.add(declf(e.name, "%s.%s", v, e.field))
- }
- continue
- }
if e.dclType == "" {
log.Fatalf("op %s has no declared type for %s", op.name, e.field)
@@ -1159,9 +1142,6 @@ func genMatch0(rr *RuleRewrite, arch arch, match, v string, cnt map[string]int,
}
}
- if op.argLength == -1 {
- rr.add(breakf("len(%s.Args) != %d", v, len(args)))
- }
return pos, checkOp
}
@@ -1230,20 +1210,12 @@ func genResult0(rr *RuleRewrite, arch arch, result string, top, move bool, pos s
}
if auxint != "" {
- if rr.Typed {
- // Make sure auxint value has the right type.
- rr.add(stmtf("%s.AuxInt = %sToAuxInt(%s)", v, unTitle(op.auxIntType()), auxint))
- } else {
- rr.add(stmtf("%s.AuxInt = %s", v, auxint))
- }
+ // Make sure auxint value has the right type.
+ rr.add(stmtf("%s.AuxInt = %sToAuxInt(%s)", v, unTitle(op.auxIntType()), auxint))
}
if aux != "" {
- if rr.Typed {
- // Make sure aux value has the right type.
- rr.add(stmtf("%s.Aux = %sToAux(%s)", v, unTitle(op.auxType()), aux))
- } else {
- rr.add(stmtf("%s.Aux = %s", v, aux))
- }
+ // Make sure aux value has the right type.
+ rr.add(stmtf("%s.Aux = %sToAux(%s)", v, unTitle(op.auxType()), aux))
}
all := new(strings.Builder)
for i, arg := range args {
@@ -1524,7 +1496,7 @@ func excludeFromExpansion(s string, idx []int) bool {
return true
}
right := s[idx[1]:]
- if strings.Contains(left, "&&") && (strings.Contains(right, "->") || strings.Contains(right, "=>")) {
+ if strings.Contains(left, "&&") && strings.Contains(right, "=>") {
// Inside && conditions.
return true
}
@@ -1626,7 +1598,6 @@ func normalizeWhitespace(x string) string {
x = strings.Replace(x, " )", ")", -1)
x = strings.Replace(x, "[ ", "[", -1)
x = strings.Replace(x, " ]", "]", -1)
- x = strings.Replace(x, ")->", ") ->", -1)
x = strings.Replace(x, ")=>", ") =>", -1)
return x
}
@@ -1683,7 +1654,7 @@ func parseEllipsisRules(rules []Rule, arch arch) (newop string, ok bool) {
return "", false
}
rule := rules[0]
- match, cond, result, _ := rule.parse()
+ match, cond, result := rule.parse()
if cond != "" || !isEllipsisValue(match) || !isEllipsisValue(result) {
if strings.Contains(rule.Rule, "...") {
log.Fatalf("%s: found ellipsis in non-ellipsis rule", rule.Loc)
@@ -1708,7 +1679,7 @@ func isEllipsisValue(s string) bool {
}
func checkEllipsisRuleCandidate(rule Rule, arch arch) {
- match, cond, result, _ := rule.parse()
+ match, cond, result := rule.parse()
if cond != "" {
return
}
@@ -1718,7 +1689,7 @@ func checkEllipsisRuleCandidate(rule Rule, arch arch) {
var usingCopy string
var eop opData
if result[0] != '(' {
- // Check for (Foo x) -> x, which can be converted to (Foo ...) -> (Copy ...).
+ // Check for (Foo x) => x, which can be converted to (Foo ...) => (Copy ...).
args2 = []string{result}
usingCopy = " using Copy"
} else {